mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-04-04 00:20:32 +00:00
Compare commits
54 Commits
sqlite
...
http-clien
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
de4f91af01 | ||
|
|
3385ce2586 | ||
|
|
f7b72b17f2 | ||
|
|
e868b553f7 | ||
|
|
778b7eb8c2 | ||
|
|
ca5fa2b866 | ||
|
|
a71ff521aa | ||
|
|
5a551607c2 | ||
|
|
13ea4d1ee3 | ||
|
|
dc600c953f | ||
|
|
e00d569754 | ||
|
|
3d760e4577 | ||
|
|
1e8bdd7e28 | ||
|
|
31bab4cc05 | ||
|
|
a1a301666f | ||
|
|
619a2653d1 | ||
|
|
0b9cae5354 | ||
|
|
f098a991a8 | ||
|
|
7b5e4d6f52 | ||
|
|
9ffc99d6a2 | ||
|
|
855c3290ff | ||
|
|
d65a4b09f3 | ||
|
|
6a57d69359 | ||
|
|
9c5e67fbf5 | ||
|
|
7edb24e54d | ||
|
|
a60932bbe0 | ||
|
|
77e9f5caf7 | ||
|
|
cedc894445 | ||
|
|
9d62e58c9a | ||
|
|
609983da87 | ||
|
|
65f77af84d | ||
|
|
cd3e6b2364 | ||
|
|
557a4458a4 | ||
|
|
ce620e208d | ||
|
|
2de35a9db2 | ||
|
|
3eb05fdd1a | ||
|
|
186fdee59b | ||
|
|
3c8bb5bc00 | ||
|
|
66d190c047 | ||
|
|
5c2207ecc3 | ||
|
|
18d347e247 | ||
|
|
29dfbbfdea | ||
|
|
02f611bbc8 | ||
|
|
349d5a0a0b | ||
|
|
647d989191 | ||
|
|
41a24623fa | ||
|
|
d9a3d912c0 | ||
|
|
070baa8f46 | ||
|
|
4f78f299a3 | ||
|
|
9568c86326 | ||
|
|
6633b6effc | ||
|
|
2962864f3d | ||
|
|
749f21816c | ||
|
|
6bb8bc8391 |
2
.github/workflows/e2e-integration-test.yml
vendored
2
.github/workflows/e2e-integration-test.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
- name: run end to end integration tests
|
||||
continue-on-error: true
|
||||
run: |
|
||||
./lightpanda serve --log-level error & echo $! > LPD.pid
|
||||
./lightpanda serve --http-proxy ${{ secrets.MASSIVE_PROXY_RESIDENTIAL_US }} --log-level error & echo $! > LPD.pid
|
||||
go run integration/main.go |tee result.log
|
||||
kill `cat LPD.pid`
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ pub fn init(allocator: Allocator, config: *const Config) !*App {
|
||||
.arena_pool = undefined,
|
||||
};
|
||||
|
||||
app.network = try Network.init(allocator, config);
|
||||
app.network = try Network.init(allocator, app, config);
|
||||
errdefer app.network.deinit();
|
||||
|
||||
app.platform = try Platform.init();
|
||||
|
||||
@@ -156,6 +156,13 @@ pub fn userAgentSuffix(self: *const Config) ?[]const u8 {
|
||||
};
|
||||
}
|
||||
|
||||
pub fn httpCacheDir(self: *const Config) ?[]const u8 {
|
||||
return switch (self.mode) {
|
||||
inline .serve, .fetch, .mcp => |opts| opts.common.http_cache_dir,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn cdpTimeout(self: *const Config) usize {
|
||||
return switch (self.mode) {
|
||||
.serve => |opts| if (opts.timeout > 604_800) 604_800_000 else @as(usize, opts.timeout) * 1000,
|
||||
@@ -273,6 +280,7 @@ pub const Common = struct {
|
||||
log_format: ?log.Format = null,
|
||||
log_filter_scopes: ?[]log.Scope = null,
|
||||
user_agent_suffix: ?[]const u8 = null,
|
||||
http_cache_dir: ?[]const u8 = null,
|
||||
|
||||
web_bot_auth_key_file: ?[]const u8 = null,
|
||||
web_bot_auth_keyid: ?[]const u8 = null,
|
||||
@@ -392,6 +400,11 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void {
|
||||
\\
|
||||
\\--web-bot-auth-domain
|
||||
\\ Your domain e.g. yourdomain.com
|
||||
\\
|
||||
\\--http-cache-dir
|
||||
\\ Path to a directory to use as a Filesystem Cache for network resources.
|
||||
\\ Omitting this will result is no caching.
|
||||
\\ Defaults to no caching.
|
||||
;
|
||||
|
||||
// MAX_HELP_LEN|
|
||||
@@ -1066,5 +1079,14 @@ fn parseCommonArg(
|
||||
return true;
|
||||
}
|
||||
|
||||
if (std.mem.eql(u8, "--http-cache-dir", opt)) {
|
||||
const str = args.next() orelse {
|
||||
log.fatal(.app, "missing argument value", .{ .arg = "--http-cache-dir" });
|
||||
return error.InvalidArgument;
|
||||
};
|
||||
common.http_cache_dir = try allocator.dupe(u8, str);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -260,14 +260,14 @@ pub const Client = struct {
|
||||
|
||||
fn start(self: *Client) void {
|
||||
const http = self.http;
|
||||
http.cdp_client = .{
|
||||
http.setCdpClient(.{
|
||||
.socket = self.ws.socket,
|
||||
.ctx = self,
|
||||
.blocking_read_start = Client.blockingReadStart,
|
||||
.blocking_read = Client.blockingRead,
|
||||
.blocking_read_end = Client.blockingReadStop,
|
||||
};
|
||||
defer http.cdp_client = null;
|
||||
});
|
||||
defer http.setCdpClient(null);
|
||||
|
||||
self.httpLoop(http) catch |err| {
|
||||
log.err(.app, "CDP client loop", .{ .err = err });
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,9 @@ charset: [41]u8 = default_charset,
|
||||
charset_len: usize = default_charset_len,
|
||||
is_default_charset: bool = true,
|
||||
|
||||
type_buf: [127]u8 = @splat(0),
|
||||
sub_type_buf: [127]u8 = @splat(0),
|
||||
|
||||
/// String "UTF-8" continued by null characters.
|
||||
const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36;
|
||||
const default_charset_len = 5;
|
||||
@@ -61,7 +64,10 @@ pub const ContentType = union(ContentTypeEnum) {
|
||||
image_webp: void,
|
||||
application_json: void,
|
||||
unknown: void,
|
||||
other: struct { type: []const u8, sub_type: []const u8 },
|
||||
other: struct {
|
||||
type: []const u8,
|
||||
sub_type: []const u8,
|
||||
},
|
||||
};
|
||||
|
||||
pub fn contentTypeString(mime: *const Mime) []const u8 {
|
||||
@@ -112,17 +118,18 @@ fn parseCharset(value: []const u8) error{ CharsetTooBig, Invalid }![]const u8 {
|
||||
return value;
|
||||
}
|
||||
|
||||
pub fn parse(input: []u8) !Mime {
|
||||
pub fn parse(input: []const u8) !Mime {
|
||||
if (input.len > 255) {
|
||||
return error.TooBig;
|
||||
}
|
||||
|
||||
// Zig's trim API is broken. The return type is always `[]const u8`,
|
||||
// even if the input type is `[]u8`. @constCast is safe here.
|
||||
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
|
||||
var buf: [255]u8 = undefined;
|
||||
const normalized = std.ascii.lowerString(&buf, std.mem.trim(u8, input, &std.ascii.whitespace));
|
||||
_ = std.ascii.lowerString(normalized, normalized);
|
||||
|
||||
const content_type, const type_len = try parseContentType(normalized);
|
||||
var mime = Mime{ .content_type = undefined };
|
||||
|
||||
const content_type, const type_len = try parseContentType(normalized, &mime.type_buf, &mime.sub_type_buf);
|
||||
if (type_len >= normalized.len) {
|
||||
return .{ .content_type = content_type };
|
||||
}
|
||||
@@ -163,13 +170,12 @@ pub fn parse(input: []u8) !Mime {
|
||||
}
|
||||
}
|
||||
|
||||
return .{
|
||||
.params = params,
|
||||
.charset = charset,
|
||||
.charset_len = charset_len,
|
||||
.content_type = content_type,
|
||||
.is_default_charset = !has_explicit_charset,
|
||||
};
|
||||
mime.params = params;
|
||||
mime.charset = charset;
|
||||
mime.charset_len = charset_len;
|
||||
mime.content_type = content_type;
|
||||
mime.is_default_charset = !has_explicit_charset;
|
||||
return mime;
|
||||
}
|
||||
|
||||
/// Prescan the first 1024 bytes of an HTML document for a charset declaration.
|
||||
@@ -395,7 +401,7 @@ pub fn isText(mime: *const Mime) bool {
|
||||
}
|
||||
|
||||
// we expect value to be lowercase
|
||||
fn parseContentType(value: []const u8) !struct { ContentType, usize } {
|
||||
fn parseContentType(value: []const u8, type_buf: []u8, sub_type_buf: []u8) !struct { ContentType, usize } {
|
||||
const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
|
||||
const type_name = trimRight(value[0..end]);
|
||||
const attribute_start = end + 1;
|
||||
@@ -444,10 +450,18 @@ fn parseContentType(value: []const u8) !struct { ContentType, usize } {
|
||||
return error.Invalid;
|
||||
}
|
||||
|
||||
return .{ .{ .other = .{
|
||||
.type = main_type,
|
||||
.sub_type = sub_type,
|
||||
} }, attribute_start };
|
||||
@memcpy(type_buf[0..main_type.len], main_type);
|
||||
@memcpy(sub_type_buf[0..sub_type.len], sub_type);
|
||||
|
||||
return .{
|
||||
.{
|
||||
.other = .{
|
||||
.type = type_buf[0..main_type.len],
|
||||
.sub_type = sub_type_buf[0..sub_type.len],
|
||||
},
|
||||
},
|
||||
attribute_start,
|
||||
};
|
||||
}
|
||||
|
||||
const VALID_CODEPOINTS = blk: {
|
||||
@@ -461,6 +475,13 @@ const VALID_CODEPOINTS = blk: {
|
||||
break :blk v;
|
||||
};
|
||||
|
||||
pub fn typeString(self: *const Mime) []const u8 {
|
||||
return switch (self.content_type) {
|
||||
.other => |o| o.type[0..o.type_len],
|
||||
else => "",
|
||||
};
|
||||
}
|
||||
|
||||
fn validType(value: []const u8) bool {
|
||||
for (value) |b| {
|
||||
if (VALID_CODEPOINTS[b] == false) {
|
||||
|
||||
@@ -886,12 +886,10 @@ fn notifyParentLoadComplete(self: *Page) void {
|
||||
parent.iframeCompletedLoading(self.iframe.?);
|
||||
}
|
||||
|
||||
fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
var self: *Page = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn pageHeaderDoneCallback(response: HttpClient.Response) !bool {
|
||||
var self: *Page = @ptrCast(@alignCast(response.ctx));
|
||||
|
||||
const header = &transfer.response_header.?;
|
||||
|
||||
const response_url = std.mem.span(header.url);
|
||||
const response_url = response.url();
|
||||
if (std.mem.eql(u8, response_url, self.url) == false) {
|
||||
// would be different than self.url in the case of a redirect
|
||||
self.url = try self.arena.dupeZ(u8, response_url);
|
||||
@@ -905,8 +903,8 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.page, "navigate header", .{
|
||||
.url = self.url,
|
||||
.status = header.status,
|
||||
.content_type = header.contentType(),
|
||||
.status = response.status(),
|
||||
.content_type = response.contentType(),
|
||||
.type = self._type,
|
||||
});
|
||||
}
|
||||
@@ -927,14 +925,14 @@ fn pageHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
fn pageDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
|
||||
var self: *Page = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn pageDataCallback(response: HttpClient.Response, data: []const u8) !void {
|
||||
var self: *Page = @ptrCast(@alignCast(response.ctx));
|
||||
|
||||
if (self._parse_state == .pre) {
|
||||
// we lazily do this, because we might need the first chunk of data
|
||||
// to sniff the content type
|
||||
var mime: Mime = blk: {
|
||||
if (transfer.response_header.?.contentType()) |ct| {
|
||||
if (response.contentType()) |ct| {
|
||||
break :blk try Mime.parse(ct);
|
||||
}
|
||||
break :blk Mime.sniff(data);
|
||||
|
||||
@@ -135,7 +135,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
|
||||
.pre, .raw, .text, .image => {
|
||||
// The main page hasn't started/finished navigating.
|
||||
// There's no JS to run, and no reason to run the scheduler.
|
||||
if (http_client.active == 0 and (comptime is_cdp) == false) {
|
||||
if (http_client.active() == 0 and (comptime is_cdp) == false) {
|
||||
// haven't started navigating, I guess.
|
||||
return .done;
|
||||
}
|
||||
@@ -169,8 +169,8 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
|
||||
// Each call to this runs scheduled load events.
|
||||
try page.dispatchLoad();
|
||||
|
||||
const http_active = http_client.active;
|
||||
const total_network_activity = http_active + http_client.intercepted;
|
||||
const http_active = http_client.active();
|
||||
const total_network_activity = http_active + http_client.intercepted();
|
||||
if (page._notified_network_almost_idle.check(total_network_activity <= 2)) {
|
||||
page.notifyNetworkAlmostIdle();
|
||||
}
|
||||
@@ -183,7 +183,7 @@ fn _tick(self: *Runner, comptime is_cdp: bool, opts: TickOpts) !CDPTickResult {
|
||||
// because is_cdp is true, and that can only be
|
||||
// the case when interception isn't possible.
|
||||
if (comptime IS_DEBUG) {
|
||||
std.debug.assert(http_client.intercepted == 0);
|
||||
std.debug.assert(http_client.intercepted() == 0);
|
||||
}
|
||||
|
||||
if (browser.hasBackgroundTasks()) {
|
||||
|
||||
@@ -273,6 +273,24 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
|
||||
// Let the outer errdefer handle releasing the arena if client.request fails
|
||||
}
|
||||
|
||||
if (comptime IS_DEBUG) {
|
||||
var ls: js.Local.Scope = undefined;
|
||||
page.js.localScope(&ls);
|
||||
defer ls.deinit();
|
||||
|
||||
log.debug(.http, "script queue", .{
|
||||
.ctx = ctx,
|
||||
.url = remote_url.?,
|
||||
.element = element,
|
||||
.stack = ls.local.stackTrace() catch "???",
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
const was_evaluating = self.is_evaluating;
|
||||
self.is_evaluating = true;
|
||||
defer self.is_evaluating = was_evaluating;
|
||||
|
||||
try self.client.request(.{
|
||||
.url = url,
|
||||
.ctx = script,
|
||||
@@ -290,20 +308,9 @@ pub fn addFromElement(self: *ScriptManager, comptime from_parser: bool, script_e
|
||||
.done_callback = Script.doneCallback,
|
||||
.error_callback = Script.errorCallback,
|
||||
});
|
||||
handover = true;
|
||||
|
||||
if (comptime IS_DEBUG) {
|
||||
var ls: js.Local.Scope = undefined;
|
||||
page.js.localScope(&ls);
|
||||
defer ls.deinit();
|
||||
|
||||
log.debug(.http, "script queue", .{
|
||||
.ctx = ctx,
|
||||
.url = remote_url.?,
|
||||
.element = element,
|
||||
.stack = ls.local.stackTrace() catch "???",
|
||||
});
|
||||
}
|
||||
|
||||
handover = true;
|
||||
}
|
||||
|
||||
if (is_blocking == false) {
|
||||
@@ -694,32 +701,33 @@ pub const Script = struct {
|
||||
self.manager.page.releaseArena(self.arena);
|
||||
}
|
||||
|
||||
fn startCallback(transfer: *HttpClient.Transfer) !void {
|
||||
log.debug(.http, "script fetch start", .{ .req = transfer });
|
||||
fn startCallback(response: HttpClient.Response) !void {
|
||||
log.debug(.http, "script fetch start", .{ .req = response });
|
||||
}
|
||||
|
||||
fn headerCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
const self: *Script = @ptrCast(@alignCast(transfer.ctx));
|
||||
const header = &transfer.response_header.?;
|
||||
self.status = header.status;
|
||||
if (header.status != 200) {
|
||||
fn headerCallback(response: HttpClient.Response) !bool {
|
||||
const self: *Script = @ptrCast(@alignCast(response.ctx));
|
||||
|
||||
self.status = response.status().?;
|
||||
if (response.status() != 200) {
|
||||
log.info(.http, "script header", .{
|
||||
.req = transfer,
|
||||
.status = header.status,
|
||||
.content_type = header.contentType(),
|
||||
.req = response,
|
||||
.status = response.status(),
|
||||
.content_type = response.contentType(),
|
||||
});
|
||||
return false;
|
||||
}
|
||||
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.http, "script header", .{
|
||||
.req = transfer,
|
||||
.status = header.status,
|
||||
.content_type = header.contentType(),
|
||||
.req = response,
|
||||
.status = response.status(),
|
||||
.content_type = response.contentType(),
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
switch (response.inner) {
|
||||
.transfer => |transfer| {
|
||||
// temp debug, trying to figure out why the next assert sometimes
|
||||
// fails. Is the buffer just corrupt or is headerCallback really
|
||||
// being called twice?
|
||||
@@ -751,25 +759,28 @@ pub const Script = struct {
|
||||
self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state);
|
||||
self.debug_transfer_auth_challenge = transfer._auth_challenge != null;
|
||||
self.debug_transfer_easy_id = if (transfer._conn) |c| @intFromPtr(c._easy) else 0;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity });
|
||||
var buffer: std.ArrayList(u8) = .empty;
|
||||
if (transfer.getContentLength()) |cl| {
|
||||
if (response.contentLength()) |cl| {
|
||||
try buffer.ensureTotalCapacity(self.arena, cl);
|
||||
}
|
||||
self.source = .{ .remote = buffer };
|
||||
return true;
|
||||
}
|
||||
|
||||
fn dataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
|
||||
const self: *Script = @ptrCast(@alignCast(transfer.ctx));
|
||||
self._dataCallback(transfer, data) catch |err| {
|
||||
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = transfer, .len = data.len });
|
||||
fn dataCallback(response: HttpClient.Response, data: []const u8) !void {
|
||||
const self: *Script = @ptrCast(@alignCast(response.ctx));
|
||||
self._dataCallback(response, data) catch |err| {
|
||||
log.err(.http, "SM.dataCallback", .{ .err = err, .transfer = response, .len = data.len });
|
||||
return err;
|
||||
};
|
||||
}
|
||||
fn _dataCallback(self: *Script, _: *HttpClient.Transfer, data: []const u8) !void {
|
||||
|
||||
fn _dataCallback(self: *Script, _: HttpClient.Response, data: []const u8) !void {
|
||||
try self.source.remote.appendSlice(self.arena, data);
|
||||
}
|
||||
|
||||
|
||||
@@ -127,16 +127,16 @@ fn handleBlobUrl(url: []const u8, resolver: js.PromiseResolver, page: *Page) !js
|
||||
return resolver.promise();
|
||||
}
|
||||
|
||||
fn httpStartCallback(transfer: *HttpClient.Transfer) !void {
|
||||
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn httpStartCallback(response: HttpClient.Response) !void {
|
||||
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.http, "request start", .{ .url = self._url, .source = "fetch" });
|
||||
}
|
||||
self._response._transfer = transfer;
|
||||
self._response._http_response = response;
|
||||
}
|
||||
|
||||
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
|
||||
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
|
||||
|
||||
if (self._signal) |signal| {
|
||||
if (signal._aborted) {
|
||||
@@ -145,25 +145,24 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
}
|
||||
|
||||
const arena = self._response._arena;
|
||||
if (transfer.getContentLength()) |cl| {
|
||||
if (response.contentLength()) |cl| {
|
||||
try self._buf.ensureTotalCapacity(arena, cl);
|
||||
}
|
||||
|
||||
const res = self._response;
|
||||
const header = transfer.response_header.?;
|
||||
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.http, "request header", .{
|
||||
.source = "fetch",
|
||||
.url = self._url,
|
||||
.status = header.status,
|
||||
.status = response.status(),
|
||||
});
|
||||
}
|
||||
|
||||
res._status = header.status;
|
||||
res._status_text = std.http.Status.phrase(@enumFromInt(header.status)) orelse "";
|
||||
res._url = try arena.dupeZ(u8, std.mem.span(header.url));
|
||||
res._is_redirected = header.redirect_count > 0;
|
||||
res._status = response.status().?;
|
||||
res._status_text = std.http.Status.phrase(@enumFromInt(response.status().?)) orelse "";
|
||||
res._url = try arena.dupeZ(u8, response.url());
|
||||
res._is_redirected = response.redirectCount().? > 0;
|
||||
|
||||
// Determine response type based on origin comparison
|
||||
const page_origin = URL.getOrigin(arena, self._page.url) catch null;
|
||||
@@ -183,7 +182,7 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
res._type = .basic;
|
||||
}
|
||||
|
||||
var it = transfer.responseHeaderIterator();
|
||||
var it = response.headerIterator();
|
||||
while (it.next()) |hdr| {
|
||||
try res._headers.append(hdr.name, hdr.value, self._page);
|
||||
}
|
||||
@@ -191,8 +190,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
|
||||
const self: *Fetch = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
|
||||
const self: *Fetch = @ptrCast(@alignCast(response.ctx));
|
||||
|
||||
// Check if aborted
|
||||
if (self._signal) |signal| {
|
||||
@@ -207,7 +206,7 @@ fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
|
||||
fn httpDoneCallback(ctx: *anyopaque) !void {
|
||||
const self: *Fetch = @ptrCast(@alignCast(ctx));
|
||||
var response = self._response;
|
||||
response._transfer = null;
|
||||
response._http_response = null;
|
||||
response._body = self._buf.items;
|
||||
|
||||
log.info(.http, "request complete", .{
|
||||
@@ -230,7 +229,7 @@ fn httpErrorCallback(ctx: *anyopaque, _: anyerror) void {
|
||||
const self: *Fetch = @ptrCast(@alignCast(ctx));
|
||||
|
||||
var response = self._response;
|
||||
response._transfer = null;
|
||||
response._http_response = null;
|
||||
// the response is only passed on v8 on success, if we're here, it's safe to
|
||||
// clear this. (defer since `self is in the response's arena).
|
||||
|
||||
@@ -256,7 +255,7 @@ fn httpShutdownCallback(ctx: *anyopaque) void {
|
||||
|
||||
if (self._owns_response) {
|
||||
var response = self._response;
|
||||
response._transfer = null;
|
||||
response._http_response = null;
|
||||
response.deinit(self._page._session);
|
||||
// Do not access `self` after this point: the Fetch struct was
|
||||
// allocated from response._arena which has been released.
|
||||
|
||||
@@ -48,7 +48,7 @@ _type: Type,
|
||||
_status_text: []const u8,
|
||||
_url: [:0]const u8,
|
||||
_is_redirected: bool,
|
||||
_transfer: ?*HttpClient.Transfer = null,
|
||||
_http_response: ?HttpClient.Response = null,
|
||||
|
||||
const InitOpts = struct {
|
||||
status: u16 = 200,
|
||||
@@ -81,9 +81,9 @@ pub fn init(body_: ?[]const u8, opts_: ?InitOpts, page: *Page) !*Response {
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Response, session: *Session) void {
|
||||
if (self._transfer) |transfer| {
|
||||
transfer.abort(error.Abort);
|
||||
self._transfer = null;
|
||||
if (self._http_response) |resp| {
|
||||
resp.abort(error.Abort);
|
||||
self._http_response = null;
|
||||
}
|
||||
session.releaseArena(self._arena);
|
||||
}
|
||||
@@ -191,7 +191,7 @@ pub fn clone(self: *const Response, page: *Page) !*Response {
|
||||
._type = self._type,
|
||||
._is_redirected = self._is_redirected,
|
||||
._headers = try Headers.init(.{ .obj = self._headers }, page),
|
||||
._transfer = null,
|
||||
._http_response = null,
|
||||
};
|
||||
return cloned;
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ _rc: lp.RC(u8) = .{},
|
||||
_page: *Page,
|
||||
_proto: *XMLHttpRequestEventTarget,
|
||||
_arena: Allocator,
|
||||
_transfer: ?*HttpClient.Transfer = null,
|
||||
_http_response: ?HttpClient.Response = null,
|
||||
_active_request: bool = false,
|
||||
|
||||
_url: [:0]const u8 = "",
|
||||
@@ -100,9 +100,9 @@ pub fn init(page: *Page) !*XMLHttpRequest {
|
||||
}
|
||||
|
||||
pub fn deinit(self: *XMLHttpRequest, session: *Session) void {
|
||||
if (self._transfer) |transfer| {
|
||||
transfer.abort(error.Abort);
|
||||
self._transfer = null;
|
||||
if (self._http_response) |resp| {
|
||||
resp.abort(error.Abort);
|
||||
self._http_response = null;
|
||||
}
|
||||
|
||||
if (self._on_ready_state_change) |func| {
|
||||
@@ -184,9 +184,9 @@ pub fn setWithCredentials(self: *XMLHttpRequest, value: bool) !void {
|
||||
// TODO: url should be a union, as it can be multiple things
|
||||
pub fn open(self: *XMLHttpRequest, method_: []const u8, url: [:0]const u8) !void {
|
||||
// Abort any in-progress request
|
||||
if (self._transfer) |transfer| {
|
||||
if (self._http_response) |transfer| {
|
||||
transfer.abort(error.Abort);
|
||||
self._transfer = null;
|
||||
self._http_response = null;
|
||||
}
|
||||
|
||||
// Reset internal state
|
||||
@@ -402,34 +402,32 @@ pub fn getResponseXML(self: *XMLHttpRequest, page: *Page) !?*Node.Document {
|
||||
};
|
||||
}
|
||||
|
||||
fn httpStartCallback(transfer: *HttpClient.Transfer) !void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn httpStartCallback(response: HttpClient.Response) !void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.http, "request start", .{ .method = self._method, .url = self._url, .source = "xhr" });
|
||||
}
|
||||
self._transfer = transfer;
|
||||
self._http_response = response;
|
||||
}
|
||||
|
||||
fn httpHeaderCallback(transfer: *HttpClient.Transfer, header: http.Header) !void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn httpHeaderCallback(response: HttpClient.Response, header: http.Header) !void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
|
||||
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ header.name, header.value });
|
||||
try self._response_headers.append(self._arena, joined);
|
||||
}
|
||||
|
||||
fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
|
||||
|
||||
const header = &transfer.response_header.?;
|
||||
fn httpHeaderDoneCallback(response: HttpClient.Response) !bool {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
|
||||
|
||||
if (comptime IS_DEBUG) {
|
||||
log.debug(.http, "request header", .{
|
||||
.source = "xhr",
|
||||
.url = self._url,
|
||||
.status = header.status,
|
||||
.status = response.status(),
|
||||
});
|
||||
}
|
||||
|
||||
if (header.contentType()) |ct| {
|
||||
if (response.contentType()) |ct| {
|
||||
self._response_mime = Mime.parse(ct) catch |e| {
|
||||
log.info(.http, "invalid content type", .{
|
||||
.content_Type = ct,
|
||||
@@ -440,18 +438,18 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
};
|
||||
}
|
||||
|
||||
var it = transfer.responseHeaderIterator();
|
||||
var it = response.headerIterator();
|
||||
while (it.next()) |hdr| {
|
||||
const joined = try std.fmt.allocPrint(self._arena, "{s}: {s}", .{ hdr.name, hdr.value });
|
||||
try self._response_headers.append(self._arena, joined);
|
||||
}
|
||||
|
||||
self._response_status = header.status;
|
||||
if (transfer.getContentLength()) |cl| {
|
||||
self._response_status = response.status().?;
|
||||
if (response.contentLength()) |cl| {
|
||||
self._response_len = cl;
|
||||
try self._response_data.ensureTotalCapacity(self._arena, cl);
|
||||
}
|
||||
self._response_url = try self._arena.dupeZ(u8, std.mem.span(header.url));
|
||||
self._response_url = try self._arena.dupeZ(u8, response.url());
|
||||
|
||||
const page = self._page;
|
||||
|
||||
@@ -466,8 +464,8 @@ fn httpHeaderDoneCallback(transfer: *HttpClient.Transfer) !bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
fn httpDataCallback(transfer: *HttpClient.Transfer, data: []const u8) !void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(transfer.ctx));
|
||||
fn httpDataCallback(response: HttpClient.Response, data: []const u8) !void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(response.ctx));
|
||||
try self._response_data.appendSlice(self._arena, data);
|
||||
|
||||
const page = self._page;
|
||||
@@ -490,7 +488,7 @@ fn httpDoneCallback(ctx: *anyopaque) !void {
|
||||
|
||||
// Not that the request is done, the http/client will free the transfer
|
||||
// object. It isn't safe to keep it around.
|
||||
self._transfer = null;
|
||||
self._http_response = null;
|
||||
|
||||
const page = self._page;
|
||||
|
||||
@@ -513,23 +511,23 @@ fn httpErrorCallback(ctx: *anyopaque, err: anyerror) void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
|
||||
// http client will close it after an error, it isn't safe to keep around
|
||||
self.handleError(err);
|
||||
if (self._transfer != null) {
|
||||
self._transfer = null;
|
||||
if (self._http_response != null) {
|
||||
self._http_response = null;
|
||||
}
|
||||
self.releaseSelfRef();
|
||||
}
|
||||
|
||||
fn httpShutdownCallback(ctx: *anyopaque) void {
|
||||
const self: *XMLHttpRequest = @ptrCast(@alignCast(ctx));
|
||||
self._transfer = null;
|
||||
self._http_response = null;
|
||||
self.releaseSelfRef();
|
||||
}
|
||||
|
||||
pub fn abort(self: *XMLHttpRequest) void {
|
||||
self.handleError(error.Abort);
|
||||
if (self._transfer) |transfer| {
|
||||
self._transfer = null;
|
||||
transfer.abort(error.Abort);
|
||||
if (self._http_response) |resp| {
|
||||
self._http_response = null;
|
||||
resp.abort(error.Abort);
|
||||
}
|
||||
self.releaseSelfRef();
|
||||
}
|
||||
|
||||
@@ -139,8 +139,8 @@ fn setLifecycleEventsEnabled(cmd: *CDP.Command) !void {
|
||||
try sendPageLifecycle(bc, "load", now, frame_id, loader_id);
|
||||
|
||||
const http_client = page._session.browser.http_client;
|
||||
const http_active = http_client.active;
|
||||
const total_network_activity = http_active + http_client.intercepted;
|
||||
const http_active = http_client.active();
|
||||
const total_network_activity = http_active + http_client.intercepted();
|
||||
if (page._notified_network_almost_idle.check(total_network_activity <= 2)) {
|
||||
try sendPageLifecycle(bc, "networkAlmostIdle", now, frame_id, loader_id);
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ pub const Scope = enum {
|
||||
telemetry,
|
||||
unknown_prop,
|
||||
mcp,
|
||||
cache,
|
||||
};
|
||||
|
||||
const Opts = struct {
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../log.zig");
|
||||
const builtin = @import("builtin");
|
||||
const net = std.net;
|
||||
const posix = std.posix;
|
||||
@@ -30,6 +31,10 @@ const http = @import("http.zig");
|
||||
const RobotStore = @import("Robots.zig").RobotStore;
|
||||
const WebBotAuth = @import("WebBotAuth.zig");
|
||||
|
||||
const Cache = @import("cache/Cache.zig");
|
||||
const FsCache = @import("cache/FsCache.zig");
|
||||
|
||||
const App = @import("../App.zig");
|
||||
const Network = @This();
|
||||
|
||||
const Listener = struct {
|
||||
@@ -45,10 +50,12 @@ const MAX_TICK_CALLBACKS = 16;
|
||||
|
||||
allocator: Allocator,
|
||||
|
||||
app: *App,
|
||||
config: *const Config,
|
||||
ca_blob: ?http.Blob,
|
||||
robot_store: RobotStore,
|
||||
web_bot_auth: ?WebBotAuth,
|
||||
cache: ?Cache,
|
||||
|
||||
connections: []http.Connection,
|
||||
available: std.DoublyLinkedList = .{},
|
||||
@@ -200,7 +207,7 @@ fn globalDeinit() void {
|
||||
libcurl.curl_global_cleanup();
|
||||
}
|
||||
|
||||
pub fn init(allocator: Allocator, config: *const Config) !Network {
|
||||
pub fn init(allocator: Allocator, app: *App, config: *const Config) !Network {
|
||||
globalInit(allocator);
|
||||
errdefer globalDeinit();
|
||||
|
||||
@@ -233,6 +240,22 @@ pub fn init(allocator: Allocator, config: *const Config) !Network {
|
||||
else
|
||||
null;
|
||||
|
||||
const cache = if (config.httpCacheDir()) |cache_dir_path|
|
||||
Cache{
|
||||
.kind = .{
|
||||
.fs = FsCache.init(cache_dir_path) catch |e| {
|
||||
log.err(.cache, "failed to init", .{
|
||||
.kind = "FsCache",
|
||||
.path = cache_dir_path,
|
||||
.err = e,
|
||||
});
|
||||
return e;
|
||||
},
|
||||
},
|
||||
}
|
||||
else
|
||||
null;
|
||||
|
||||
return .{
|
||||
.allocator = allocator,
|
||||
.config = config,
|
||||
@@ -244,8 +267,10 @@ pub fn init(allocator: Allocator, config: *const Config) !Network {
|
||||
.available = available,
|
||||
.connections = connections,
|
||||
|
||||
.app = app,
|
||||
.robot_store = RobotStore.init(allocator),
|
||||
.web_bot_auth = web_bot_auth,
|
||||
.cache = cache,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -278,6 +303,8 @@ pub fn deinit(self: *Network) void {
|
||||
wba.deinit(self.allocator);
|
||||
}
|
||||
|
||||
if (self.cache) |*cache| cache.deinit();
|
||||
|
||||
globalDeinit();
|
||||
}
|
||||
|
||||
|
||||
213
src/network/cache/Cache.zig
vendored
Normal file
213
src/network/cache/Cache.zig
vendored
Normal file
@@ -0,0 +1,213 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
const Http = @import("../http.zig");
|
||||
const FsCache = @import("FsCache.zig");
|
||||
|
||||
/// A browser-wide cache for resources across the network.
|
||||
/// This mostly conforms to RFC9111 with regards to caching behavior.
|
||||
pub const Cache = @This();
|
||||
|
||||
kind: union(enum) {
|
||||
fs: FsCache,
|
||||
},
|
||||
|
||||
pub fn deinit(self: *Cache) void {
|
||||
return switch (self.kind) {
|
||||
inline else => |*c| c.deinit(),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn get(self: *Cache, arena: std.mem.Allocator, req: CacheRequest) ?CachedResponse {
|
||||
return switch (self.kind) {
|
||||
inline else => |*c| c.get(arena, req),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn put(self: *Cache, metadata: CachedMetadata, body: []const u8) !void {
|
||||
return switch (self.kind) {
|
||||
inline else => |*c| c.put(metadata, body),
|
||||
};
|
||||
}
|
||||
|
||||
pub const CacheControl = struct {
|
||||
max_age: u64,
|
||||
|
||||
pub fn parse(value: []const u8) ?CacheControl {
|
||||
var cc: CacheControl = .{ .max_age = undefined };
|
||||
|
||||
var max_age_set = false;
|
||||
var max_s_age_set = false;
|
||||
var is_public = false;
|
||||
|
||||
var iter = std.mem.splitScalar(u8, value, ',');
|
||||
while (iter.next()) |part| {
|
||||
const directive = std.mem.trim(u8, part, &std.ascii.whitespace);
|
||||
if (std.ascii.eqlIgnoreCase(directive, "no-store")) {
|
||||
return null;
|
||||
} else if (std.ascii.eqlIgnoreCase(directive, "no-cache")) {
|
||||
return null;
|
||||
} else if (std.ascii.eqlIgnoreCase(directive, "public")) {
|
||||
is_public = true;
|
||||
} else if (std.ascii.startsWithIgnoreCase(directive, "max-age=")) {
|
||||
if (!max_s_age_set) {
|
||||
if (std.fmt.parseInt(u64, directive[8..], 10) catch null) |max_age| {
|
||||
cc.max_age = max_age;
|
||||
max_age_set = true;
|
||||
}
|
||||
}
|
||||
} else if (std.ascii.startsWithIgnoreCase(directive, "s-maxage=")) {
|
||||
if (std.fmt.parseInt(u64, directive[9..], 10) catch null) |max_age| {
|
||||
cc.max_age = max_age;
|
||||
max_age_set = true;
|
||||
max_s_age_set = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!max_age_set) return null;
|
||||
if (!is_public) return null;
|
||||
if (cc.max_age == 0) return null;
|
||||
|
||||
return cc;
|
||||
}
|
||||
};
|
||||
|
||||
pub const CachedMetadata = struct {
|
||||
url: [:0]const u8,
|
||||
content_type: []const u8,
|
||||
|
||||
status: u16,
|
||||
stored_at: i64,
|
||||
age_at_store: u64,
|
||||
|
||||
cache_control: CacheControl,
|
||||
/// Response Headers
|
||||
headers: []const Http.Header,
|
||||
|
||||
/// These are Request Headers used by Vary.
|
||||
vary_headers: []const Http.Header,
|
||||
|
||||
pub fn format(self: CachedMetadata, writer: *std.Io.Writer) !void {
|
||||
try writer.print("url={s} | status={d} | content_type={s} | max_age={d} | vary=[", .{
|
||||
self.url,
|
||||
self.status,
|
||||
self.content_type,
|
||||
self.cache_control.max_age,
|
||||
});
|
||||
|
||||
// Logging all headers gets pretty verbose...
|
||||
// so we just log the Vary ones that matter for caching.
|
||||
|
||||
if (self.vary_headers.len > 0) {
|
||||
for (self.vary_headers, 0..) |hdr, i| {
|
||||
if (i > 0) try writer.print(", ", .{});
|
||||
try writer.print("{s}: {s}", .{ hdr.name, hdr.value });
|
||||
}
|
||||
}
|
||||
try writer.print("]", .{});
|
||||
}
|
||||
};
|
||||
|
||||
pub const CacheRequest = struct {
|
||||
url: []const u8,
|
||||
timestamp: i64,
|
||||
request_headers: []const Http.Header,
|
||||
};
|
||||
|
||||
pub const CachedData = union(enum) {
|
||||
buffer: []const u8,
|
||||
file: struct {
|
||||
file: std.fs.File,
|
||||
offset: usize,
|
||||
len: usize,
|
||||
},
|
||||
|
||||
pub fn format(self: CachedData, writer: *std.Io.Writer) !void {
|
||||
switch (self) {
|
||||
.buffer => |buf| try writer.print("buffer({d} bytes)", .{buf.len}),
|
||||
.file => |f| try writer.print("file(offset={d}, len={d} bytes)", .{ f.offset, f.len }),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
pub const CachedResponse = struct {
|
||||
metadata: CachedMetadata,
|
||||
data: CachedData,
|
||||
|
||||
pub fn format(self: *const CachedResponse, writer: *std.Io.Writer) !void {
|
||||
try writer.print("metadata=(", .{});
|
||||
try self.metadata.format(writer);
|
||||
try writer.print("), data=", .{});
|
||||
try self.data.format(writer);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn tryCache(
|
||||
arena: std.mem.Allocator,
|
||||
timestamp: i64,
|
||||
url: [:0]const u8,
|
||||
status: u16,
|
||||
content_type: ?[]const u8,
|
||||
cache_control: ?[]const u8,
|
||||
vary: ?[]const u8,
|
||||
age: ?[]const u8,
|
||||
has_set_cookie: bool,
|
||||
has_authorization: bool,
|
||||
) !?CachedMetadata {
|
||||
if (status != 200) {
|
||||
log.debug(.cache, "no store", .{ .url = url, .code = status, .reason = "status" });
|
||||
return null;
|
||||
}
|
||||
if (has_set_cookie) {
|
||||
log.debug(.cache, "no store", .{ .url = url, .reason = "has_cookies" });
|
||||
return null;
|
||||
}
|
||||
if (has_authorization) {
|
||||
log.debug(.cache, "no store", .{ .url = url, .reason = "has_authorization" });
|
||||
return null;
|
||||
}
|
||||
if (vary) |v| if (std.mem.eql(u8, v, "*")) {
|
||||
log.debug(.cache, "no store", .{ .url = url, .vary = v, .reason = "vary" });
|
||||
return null;
|
||||
};
|
||||
const cc = blk: {
|
||||
if (cache_control == null) {
|
||||
log.debug(.cache, "no store", .{ .url = url, .reason = "no cache control" });
|
||||
return null;
|
||||
}
|
||||
if (CacheControl.parse(cache_control.?)) |cc| {
|
||||
break :blk cc;
|
||||
}
|
||||
log.debug(.cache, "no store", .{ .url = url, .cache_control = cache_control.?, .reason = "cache control" });
|
||||
return null;
|
||||
};
|
||||
|
||||
return .{
|
||||
.url = try arena.dupeZ(u8, url),
|
||||
.content_type = if (content_type) |ct| try arena.dupe(u8, ct) else "application/octet-stream",
|
||||
.status = status,
|
||||
.stored_at = timestamp,
|
||||
.age_at_store = if (age) |a| std.fmt.parseInt(u64, a, 10) catch 0 else 0,
|
||||
.cache_control = cc,
|
||||
.headers = &.{},
|
||||
.vary_headers = &.{},
|
||||
};
|
||||
}
|
||||
612
src/network/cache/FsCache.zig
vendored
Normal file
612
src/network/cache/FsCache.zig
vendored
Normal file
@@ -0,0 +1,612 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
const Cache = @import("Cache.zig");
|
||||
const Http = @import("../http.zig");
|
||||
const CacheRequest = Cache.CacheRequest;
|
||||
const CachedMetadata = Cache.CachedMetadata;
|
||||
const CachedResponse = Cache.CachedResponse;
|
||||
|
||||
const CACHE_VERSION: usize = 1;
|
||||
const LOCK_STRIPES = 16;
|
||||
comptime {
|
||||
std.debug.assert(std.math.isPowerOfTwo(LOCK_STRIPES));
|
||||
}
|
||||
|
||||
pub const FsCache = @This();
|
||||
|
||||
dir: std.fs.Dir,
|
||||
locks: [LOCK_STRIPES]std.Thread.Mutex = .{std.Thread.Mutex{}} ** LOCK_STRIPES,
|
||||
|
||||
const CacheMetadataJson = struct {
|
||||
version: usize,
|
||||
metadata: CachedMetadata,
|
||||
};
|
||||
|
||||
fn getLockPtr(self: *FsCache, key: *const [HASHED_KEY_LEN]u8) *std.Thread.Mutex {
|
||||
const lock_idx = std.hash.Wyhash.hash(0, key[0..]) & (LOCK_STRIPES - 1);
|
||||
return &self.locks[lock_idx];
|
||||
}
|
||||
|
||||
const BODY_LEN_HEADER_LEN = 8;
|
||||
const HASHED_KEY_LEN = 64;
|
||||
const HASHED_PATH_LEN = HASHED_KEY_LEN + 6;
|
||||
const HASHED_TMP_PATH_LEN = HASHED_PATH_LEN + 4;
|
||||
|
||||
fn hashKey(key: []const u8) [HASHED_KEY_LEN]u8 {
|
||||
var digest: [std.crypto.hash.sha2.Sha256.digest_length]u8 = undefined;
|
||||
std.crypto.hash.sha2.Sha256.hash(key, &digest, .{});
|
||||
var hex: [HASHED_KEY_LEN]u8 = undefined;
|
||||
_ = std.fmt.bufPrint(&hex, "{s}", .{std.fmt.bytesToHex(&digest, .lower)}) catch unreachable;
|
||||
return hex;
|
||||
}
|
||||
|
||||
fn cachePath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_PATH_LEN]u8 {
|
||||
var path: [HASHED_PATH_LEN]u8 = undefined;
|
||||
_ = std.fmt.bufPrint(&path, "{s}.cache", .{hashed_key}) catch unreachable;
|
||||
return path;
|
||||
}
|
||||
|
||||
fn cacheTmpPath(hashed_key: *const [HASHED_KEY_LEN]u8) [HASHED_TMP_PATH_LEN]u8 {
|
||||
var path: [HASHED_TMP_PATH_LEN]u8 = undefined;
|
||||
_ = std.fmt.bufPrint(&path, "{s}.cache.tmp", .{hashed_key}) catch unreachable;
|
||||
return path;
|
||||
}
|
||||
|
||||
pub fn init(path: []const u8) !FsCache {
|
||||
const cwd = std.fs.cwd();
|
||||
|
||||
cwd.makeDir(path) catch |err| switch (err) {
|
||||
error.PathAlreadyExists => {},
|
||||
else => return err,
|
||||
};
|
||||
|
||||
const dir = try cwd.openDir(path, .{ .iterate = true });
|
||||
return .{ .dir = dir };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *FsCache) void {
|
||||
self.dir.close();
|
||||
}
|
||||
|
||||
pub fn get(self: *FsCache, arena: std.mem.Allocator, req: CacheRequest) ?Cache.CachedResponse {
|
||||
const hashed_key = hashKey(req.url);
|
||||
const cache_p = cachePath(&hashed_key);
|
||||
|
||||
const lock = self.getLockPtr(&hashed_key);
|
||||
lock.lock();
|
||||
defer lock.unlock();
|
||||
|
||||
const file = self.dir.openFile(&cache_p, .{ .mode = .read_only }) catch |e| {
|
||||
switch (e) {
|
||||
std.fs.File.OpenError.FileNotFound => {
|
||||
log.debug(.cache, "miss", .{ .url = req.url, .hash = &hashed_key, .reason = "missing" });
|
||||
},
|
||||
else => |err| {
|
||||
log.warn(.cache, "open file err", .{ .url = req.url, .err = err });
|
||||
},
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
var cleanup = false;
|
||||
defer if (cleanup) {
|
||||
file.close();
|
||||
self.dir.deleteFile(&cache_p) catch |e| {
|
||||
log.err(.cache, "clean fail", .{ .url = req.url, .file = &cache_p, .err = e });
|
||||
};
|
||||
};
|
||||
|
||||
var file_buf: [1024]u8 = undefined;
|
||||
var len_buf: [BODY_LEN_HEADER_LEN]u8 = undefined;
|
||||
|
||||
var file_reader = file.reader(&file_buf);
|
||||
const file_reader_iface = &file_reader.interface;
|
||||
|
||||
file_reader_iface.readSliceAll(&len_buf) catch |e| {
|
||||
log.warn(.cache, "read header", .{ .url = req.url, .err = e });
|
||||
cleanup = true;
|
||||
return null;
|
||||
};
|
||||
const body_len = std.mem.readInt(u64, &len_buf, .little);
|
||||
|
||||
// Now we read metadata.
|
||||
file_reader.seekTo(body_len + BODY_LEN_HEADER_LEN) catch |e| {
|
||||
log.warn(.cache, "seek metadata", .{ .url = req.url, .err = e });
|
||||
cleanup = true;
|
||||
return null;
|
||||
};
|
||||
|
||||
var json_reader = std.json.Reader.init(arena, file_reader_iface);
|
||||
const cache_file: CacheMetadataJson = std.json.parseFromTokenSourceLeaky(
|
||||
CacheMetadataJson,
|
||||
arena,
|
||||
&json_reader,
|
||||
.{ .allocate = .alloc_always },
|
||||
) catch |e| {
|
||||
// Warn because malformed metadata can be a deeper symptom.
|
||||
log.warn(.cache, "miss", .{ .url = req.url, .err = e, .reason = "malformed metadata" });
|
||||
cleanup = true;
|
||||
return null;
|
||||
};
|
||||
|
||||
if (cache_file.version != CACHE_VERSION) {
|
||||
log.debug(.cache, "miss", .{
|
||||
.url = req.url,
|
||||
.reason = "version mismatch",
|
||||
.expected = CACHE_VERSION,
|
||||
.got = cache_file.version,
|
||||
});
|
||||
cleanup = true;
|
||||
return null;
|
||||
}
|
||||
|
||||
const metadata = cache_file.metadata;
|
||||
|
||||
// Check entry expiration.
|
||||
const now = req.timestamp;
|
||||
const age = (now - metadata.stored_at) + @as(i64, @intCast(metadata.age_at_store));
|
||||
if (age < 0 or @as(u64, @intCast(age)) >= metadata.cache_control.max_age) {
|
||||
log.debug(.cache, "miss", .{ .url = req.url, .reason = "expired" });
|
||||
cleanup = true;
|
||||
return null;
|
||||
}
|
||||
|
||||
// If we have Vary headers, ensure they are present & matching.
|
||||
for (metadata.vary_headers) |vary_hdr| {
|
||||
const name = vary_hdr.name;
|
||||
const value = vary_hdr.value;
|
||||
|
||||
const incoming = for (req.request_headers) |h| {
|
||||
if (std.ascii.eqlIgnoreCase(h.name, name)) break h.value;
|
||||
} else "";
|
||||
|
||||
if (!std.ascii.eqlIgnoreCase(value, incoming)) {
|
||||
log.debug(.cache, "miss", .{
|
||||
.url = req.url,
|
||||
.reason = "vary mismatch",
|
||||
.header = name,
|
||||
.expected = value,
|
||||
.got = incoming,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// On the case of a hash collision.
|
||||
if (!std.ascii.eqlIgnoreCase(metadata.url, req.url)) {
|
||||
log.warn(.cache, "collision", .{ .url = req.url, .expected = metadata.url, .got = req.url });
|
||||
cleanup = true;
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug(.cache, "hit", .{ .url = req.url, .hash = &hashed_key });
|
||||
|
||||
return .{
|
||||
.metadata = metadata,
|
||||
.data = .{
|
||||
.file = .{
|
||||
.file = file,
|
||||
.offset = BODY_LEN_HEADER_LEN,
|
||||
.len = body_len,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn put(self: *FsCache, meta: CachedMetadata, body: []const u8) !void {
|
||||
const hashed_key = hashKey(meta.url);
|
||||
const cache_p = cachePath(&hashed_key);
|
||||
const cache_tmp_p = cacheTmpPath(&hashed_key);
|
||||
|
||||
const lock = self.getLockPtr(&hashed_key);
|
||||
lock.lock();
|
||||
defer lock.unlock();
|
||||
|
||||
const file = self.dir.createFile(&cache_tmp_p, .{ .truncate = true }) catch |e| {
|
||||
log.err(.cache, "create file", .{ .url = meta.url, .file = &cache_tmp_p, .err = e });
|
||||
return e;
|
||||
};
|
||||
defer file.close();
|
||||
|
||||
var writer_buf: [1024]u8 = undefined;
|
||||
var file_writer = file.writer(&writer_buf);
|
||||
var file_writer_iface = &file_writer.interface;
|
||||
|
||||
var len_buf: [8]u8 = undefined;
|
||||
std.mem.writeInt(u64, &len_buf, body.len, .little);
|
||||
|
||||
file_writer_iface.writeAll(&len_buf) catch |e| {
|
||||
log.err(.cache, "write body len", .{ .url = meta.url, .err = e });
|
||||
return e;
|
||||
};
|
||||
file_writer_iface.writeAll(body) catch |e| {
|
||||
log.err(.cache, "write body", .{ .url = meta.url, .err = e });
|
||||
return e;
|
||||
};
|
||||
std.json.Stringify.value(
|
||||
CacheMetadataJson{ .version = CACHE_VERSION, .metadata = meta },
|
||||
.{ .whitespace = .minified },
|
||||
file_writer_iface,
|
||||
) catch |e| {
|
||||
log.err(.cache, "write metadata", .{ .url = meta.url, .err = e });
|
||||
return e;
|
||||
};
|
||||
file_writer_iface.flush() catch |e| {
|
||||
log.err(.cache, "flush", .{ .url = meta.url, .err = e });
|
||||
return e;
|
||||
};
|
||||
self.dir.rename(&cache_tmp_p, &cache_p) catch |e| {
|
||||
log.err(.cache, "rename", .{ .url = meta.url, .from = &cache_tmp_p, .to = &cache_p, .err = e });
|
||||
return e;
|
||||
};
|
||||
|
||||
log.debug(.cache, "put", .{ .url = meta.url, .hash = &hashed_key, .body_len = body.len });
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
fn setupCache() !struct { tmp: testing.TmpDir, cache: Cache } {
|
||||
var tmp = testing.tmpDir(.{});
|
||||
errdefer tmp.cleanup();
|
||||
|
||||
const path = try tmp.dir.realpathAlloc(testing.allocator, ".");
|
||||
defer testing.allocator.free(path);
|
||||
|
||||
return .{
|
||||
.tmp = tmp,
|
||||
.cache = Cache{ .kind = .{ .fs = try FsCache.init(path) } },
|
||||
};
|
||||
}
|
||||
|
||||
test "FsCache: basic put and get" {
|
||||
var setup = try setupCache();
|
||||
defer {
|
||||
setup.cache.deinit();
|
||||
setup.tmp.cleanup();
|
||||
}
|
||||
|
||||
const cache = &setup.cache;
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
const now = std.time.timestamp();
|
||||
const meta = CachedMetadata{
|
||||
.url = "https://example.com",
|
||||
.content_type = "text/html",
|
||||
.status = 200,
|
||||
.stored_at = now,
|
||||
.age_at_store = 0,
|
||||
.cache_control = .{ .max_age = 600 },
|
||||
.headers = &.{},
|
||||
.vary_headers = &.{},
|
||||
};
|
||||
|
||||
const body = "hello world";
|
||||
try cache.put(meta, body);
|
||||
|
||||
const result = cache.get(
|
||||
arena.allocator(),
|
||||
.{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{},
|
||||
},
|
||||
) orelse return error.CacheMiss;
|
||||
const f = result.data.file;
|
||||
const file = f.file;
|
||||
defer file.close();
|
||||
|
||||
var buf: [64]u8 = undefined;
|
||||
var file_reader = file.reader(&buf);
|
||||
try file_reader.seekTo(f.offset);
|
||||
|
||||
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
|
||||
defer testing.allocator.free(read_buf);
|
||||
try testing.expectEqualStrings(body, read_buf);
|
||||
}
|
||||
|
||||
test "FsCache: get expiration" {
|
||||
var setup = try setupCache();
|
||||
defer {
|
||||
setup.cache.deinit();
|
||||
setup.tmp.cleanup();
|
||||
}
|
||||
|
||||
const cache = &setup.cache;
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
const now = 5000;
|
||||
const max_age = 1000;
|
||||
|
||||
const meta = CachedMetadata{
|
||||
.url = "https://example.com",
|
||||
.content_type = "text/html",
|
||||
.status = 200,
|
||||
.stored_at = now,
|
||||
.age_at_store = 900,
|
||||
.cache_control = .{ .max_age = max_age },
|
||||
.headers = &.{},
|
||||
.vary_headers = &.{},
|
||||
};
|
||||
|
||||
const body = "hello world";
|
||||
try cache.put(meta, body);
|
||||
|
||||
const result = cache.get(
|
||||
arena.allocator(),
|
||||
.{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now + 50,
|
||||
.request_headers = &.{},
|
||||
},
|
||||
) orelse return error.CacheMiss;
|
||||
result.data.file.file.close();
|
||||
|
||||
try testing.expectEqual(null, cache.get(
|
||||
arena.allocator(),
|
||||
.{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now + 200,
|
||||
.request_headers = &.{},
|
||||
},
|
||||
));
|
||||
|
||||
try testing.expectEqual(null, cache.get(
|
||||
arena.allocator(),
|
||||
.{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{},
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
test "FsCache: put override" {
|
||||
var setup = try setupCache();
|
||||
defer {
|
||||
setup.cache.deinit();
|
||||
setup.tmp.cleanup();
|
||||
}
|
||||
|
||||
const cache = &setup.cache;
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
{
|
||||
const now = 5000;
|
||||
const max_age = 1000;
|
||||
|
||||
const meta = CachedMetadata{
|
||||
.url = "https://example.com",
|
||||
.content_type = "text/html",
|
||||
.status = 200,
|
||||
.stored_at = now,
|
||||
.age_at_store = 900,
|
||||
.cache_control = .{ .max_age = max_age },
|
||||
.headers = &.{},
|
||||
.vary_headers = &.{},
|
||||
};
|
||||
|
||||
const body = "hello world";
|
||||
try cache.put(meta, body);
|
||||
|
||||
const result = cache.get(
|
||||
arena.allocator(),
|
||||
.{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{},
|
||||
},
|
||||
) orelse return error.CacheMiss;
|
||||
const f = result.data.file;
|
||||
const file = f.file;
|
||||
defer file.close();
|
||||
|
||||
var buf: [64]u8 = undefined;
|
||||
var file_reader = file.reader(&buf);
|
||||
try file_reader.seekTo(f.offset);
|
||||
|
||||
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
|
||||
defer testing.allocator.free(read_buf);
|
||||
|
||||
try testing.expectEqualStrings(body, read_buf);
|
||||
}
|
||||
|
||||
{
|
||||
const now = 10000;
|
||||
const max_age = 2000;
|
||||
|
||||
const meta = CachedMetadata{
|
||||
.url = "https://example.com",
|
||||
.content_type = "text/html",
|
||||
.status = 200,
|
||||
.stored_at = now,
|
||||
.age_at_store = 0,
|
||||
.cache_control = .{ .max_age = max_age },
|
||||
.headers = &.{},
|
||||
.vary_headers = &.{},
|
||||
};
|
||||
|
||||
const body = "goodbye world";
|
||||
try cache.put(meta, body);
|
||||
|
||||
const result = cache.get(
|
||||
arena.allocator(),
|
||||
.{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{},
|
||||
},
|
||||
) orelse return error.CacheMiss;
|
||||
const f = result.data.file;
|
||||
const file = f.file;
|
||||
defer file.close();
|
||||
|
||||
var buf: [64]u8 = undefined;
|
||||
var file_reader = file.reader(&buf);
|
||||
try file_reader.seekTo(f.offset);
|
||||
|
||||
const read_buf = try file_reader.interface.readAlloc(testing.allocator, f.len);
|
||||
defer testing.allocator.free(read_buf);
|
||||
|
||||
try testing.expectEqualStrings(body, read_buf);
|
||||
}
|
||||
}
|
||||
|
||||
test "FsCache: garbage file" {
|
||||
var setup = try setupCache();
|
||||
defer {
|
||||
setup.cache.deinit();
|
||||
setup.tmp.cleanup();
|
||||
}
|
||||
|
||||
const hashed_key = hashKey("https://example.com");
|
||||
const cache_p = cachePath(&hashed_key);
|
||||
const file = try setup.cache.kind.fs.dir.createFile(&cache_p, .{});
|
||||
try file.writeAll("this is not a valid cache file !@#$%");
|
||||
file.close();
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
try testing.expectEqual(
|
||||
null,
|
||||
setup.cache.get(arena.allocator(), .{
|
||||
.url = "https://example.com",
|
||||
.timestamp = 5000,
|
||||
.request_headers = &.{},
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
test "FsCache: vary hit and miss" {
|
||||
var setup = try setupCache();
|
||||
defer {
|
||||
setup.cache.deinit();
|
||||
setup.tmp.cleanup();
|
||||
}
|
||||
|
||||
const cache = &setup.cache;
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
const now = std.time.timestamp();
|
||||
const meta = CachedMetadata{
|
||||
.url = "https://example.com",
|
||||
.content_type = "text/html",
|
||||
.status = 200,
|
||||
.stored_at = now,
|
||||
.age_at_store = 0,
|
||||
.cache_control = .{ .max_age = 600 },
|
||||
.headers = &.{},
|
||||
.vary_headers = &.{
|
||||
.{ .name = "Accept-Encoding", .value = "gzip" },
|
||||
},
|
||||
};
|
||||
|
||||
try cache.put(meta, "hello world");
|
||||
|
||||
const result = cache.get(arena.allocator(), .{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{
|
||||
.{ .name = "Accept-Encoding", .value = "gzip" },
|
||||
},
|
||||
}) orelse return error.CacheMiss;
|
||||
result.data.file.file.close();
|
||||
|
||||
try testing.expectEqual(null, cache.get(arena.allocator(), .{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{
|
||||
.{ .name = "Accept-Encoding", .value = "br" },
|
||||
},
|
||||
}));
|
||||
|
||||
try testing.expectEqual(null, cache.get(arena.allocator(), .{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{},
|
||||
}));
|
||||
|
||||
const result2 = cache.get(arena.allocator(), .{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{
|
||||
.{ .name = "Accept-Encoding", .value = "gzip" },
|
||||
},
|
||||
}) orelse return error.CacheMiss;
|
||||
result2.data.file.file.close();
|
||||
}
|
||||
|
||||
test "FsCache: vary multiple headers" {
|
||||
var setup = try setupCache();
|
||||
defer {
|
||||
setup.cache.deinit();
|
||||
setup.tmp.cleanup();
|
||||
}
|
||||
|
||||
const cache = &setup.cache;
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
|
||||
const now = std.time.timestamp();
|
||||
const meta = CachedMetadata{
|
||||
.url = "https://example.com",
|
||||
.content_type = "text/html",
|
||||
.status = 200,
|
||||
.stored_at = now,
|
||||
.age_at_store = 0,
|
||||
.cache_control = .{ .max_age = 600 },
|
||||
.headers = &.{},
|
||||
.vary_headers = &.{
|
||||
.{ .name = "Accept-Encoding", .value = "gzip" },
|
||||
.{ .name = "Accept-Language", .value = "en" },
|
||||
},
|
||||
};
|
||||
|
||||
try cache.put(meta, "hello world");
|
||||
|
||||
const result = cache.get(arena.allocator(), .{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{
|
||||
.{ .name = "Accept-Encoding", .value = "gzip" },
|
||||
.{ .name = "Accept-Language", .value = "en" },
|
||||
},
|
||||
}) orelse return error.CacheMiss;
|
||||
result.data.file.file.close();
|
||||
|
||||
try testing.expectEqual(null, cache.get(arena.allocator(), .{
|
||||
.url = "https://example.com",
|
||||
.timestamp = now,
|
||||
.request_headers = &.{
|
||||
.{ .name = "Accept-Encoding", .value = "gzip" },
|
||||
.{ .name = "Accept-Language", .value = "fr" },
|
||||
},
|
||||
}));
|
||||
}
|
||||
@@ -79,7 +79,7 @@ pub const Headers = struct {
|
||||
self.headers = updated_headers;
|
||||
}
|
||||
|
||||
fn parseHeader(header_str: []const u8) ?Header {
|
||||
pub fn parseHeader(header_str: []const u8) ?Header {
|
||||
const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null;
|
||||
|
||||
const name = std.mem.trim(u8, header_str[0..colon_pos], " \t");
|
||||
@@ -88,22 +88,9 @@ pub const Headers = struct {
|
||||
return .{ .name = name, .value = value };
|
||||
}
|
||||
|
||||
pub fn iterator(self: *Headers) Iterator {
|
||||
return .{
|
||||
.header = self.headers,
|
||||
};
|
||||
pub fn iterator(self: Headers) HeaderIterator {
|
||||
return .{ .curl_slist = .{ .header = self.headers } };
|
||||
}
|
||||
|
||||
const Iterator = struct {
|
||||
header: [*c]libcurl.CurlSList,
|
||||
|
||||
pub fn next(self: *Iterator) ?Header {
|
||||
const h = self.header orelse return null;
|
||||
|
||||
self.header = h.*.next;
|
||||
return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
// In normal cases, the header iterator comes from the curl linked list.
|
||||
@@ -112,6 +99,7 @@ pub const Headers = struct {
|
||||
// This union, is an iterator that exposes the same API for either case.
|
||||
pub const HeaderIterator = union(enum) {
|
||||
curl: CurlHeaderIterator,
|
||||
curl_slist: CurlSListIterator,
|
||||
list: ListHeaderIterator,
|
||||
|
||||
pub fn next(self: *HeaderIterator) ?Header {
|
||||
@@ -120,6 +108,19 @@ pub const HeaderIterator = union(enum) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn collect(self: *HeaderIterator, allocator: std.mem.Allocator) !std.ArrayList(Header) {
|
||||
var list: std.ArrayList(Header) = .empty;
|
||||
|
||||
while (self.next()) |hdr| {
|
||||
try list.append(allocator, .{
|
||||
.name = try allocator.dupe(u8, hdr.name),
|
||||
.value = try allocator.dupe(u8, hdr.value),
|
||||
});
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
const CurlHeaderIterator = struct {
|
||||
conn: *const Connection,
|
||||
prev: ?*libcurl.CurlHeader = null,
|
||||
@@ -136,6 +137,16 @@ pub const HeaderIterator = union(enum) {
|
||||
}
|
||||
};
|
||||
|
||||
const CurlSListIterator = struct {
|
||||
header: [*c]libcurl.CurlSList,
|
||||
|
||||
pub fn next(self: *CurlSListIterator) ?Header {
|
||||
const h = self.header orelse return null;
|
||||
self.header = h.*.next;
|
||||
return Headers.parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data))));
|
||||
}
|
||||
};
|
||||
|
||||
const ListHeaderIterator = struct {
|
||||
index: usize = 0,
|
||||
list: []const Header,
|
||||
|
||||
240
src/network/layer/CacheLayer.zig
Normal file
240
src/network/layer/CacheLayer.zig
Normal file
@@ -0,0 +1,240 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
|
||||
const http = @import("../http.zig");
|
||||
const Transfer = @import("../../browser/HttpClient.zig").Transfer;
|
||||
const Context = @import("../../browser/HttpClient.zig").Context;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
|
||||
const Cache = @import("../cache/Cache.zig");
|
||||
const CachedMetadata = @import("../cache/Cache.zig").CachedMetadata;
|
||||
const CachedResponse = @import("../cache/Cache.zig").CachedResponse;
|
||||
const Forward = @import("Forward.zig");
|
||||
|
||||
const CacheLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
|
||||
pub fn layer(self: *CacheLayer) Layer {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{
|
||||
.request = request,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void {
|
||||
const self: *CacheLayer = @ptrCast(@alignCast(ptr));
|
||||
const network = ctx.network;
|
||||
|
||||
if (network.cache == null or req.method != .GET) {
|
||||
return self.next.request(ctx, req);
|
||||
}
|
||||
|
||||
const arena = try network.app.arena_pool.acquire(.{ .debug = "CacheLayer" });
|
||||
errdefer network.app.arena_pool.release(arena);
|
||||
|
||||
var iter = req.headers.iterator();
|
||||
const req_header_list = try iter.collect(arena);
|
||||
|
||||
if (network.cache.?.get(arena, .{
|
||||
.url = req.url,
|
||||
.timestamp = std.time.timestamp(),
|
||||
.request_headers = req_header_list.items,
|
||||
})) |cached| {
|
||||
defer req.headers.deinit();
|
||||
defer network.app.arena_pool.release(arena);
|
||||
return serveFromCache(req, &cached);
|
||||
}
|
||||
|
||||
const cache_ctx = try arena.create(CacheContext);
|
||||
cache_ctx.* = .{
|
||||
.arena = arena,
|
||||
.context = ctx,
|
||||
.forward = Forward.fromRequest(req),
|
||||
.req_url = req.url,
|
||||
.req_headers = req.headers,
|
||||
};
|
||||
|
||||
const wrapped = cache_ctx.forward.wrapRequest(
|
||||
req,
|
||||
cache_ctx,
|
||||
"forward",
|
||||
.{
|
||||
.start = CacheContext.startCallback,
|
||||
.header = CacheContext.headerCallback,
|
||||
.done = CacheContext.doneCallback,
|
||||
.shutdown = CacheContext.shutdownCallback,
|
||||
.err = CacheContext.errorCallback,
|
||||
},
|
||||
);
|
||||
|
||||
return self.next.request(ctx, wrapped);
|
||||
}
|
||||
|
||||
fn serveFromCache(req: Request, cached: *const CachedResponse) !void {
|
||||
const response = Response.fromCached(req.ctx, cached);
|
||||
defer switch (cached.data) {
|
||||
.buffer => |_| {},
|
||||
.file => |f| f.file.close(),
|
||||
};
|
||||
|
||||
if (req.start_callback) |cb| {
|
||||
try cb(response);
|
||||
}
|
||||
|
||||
const proceed = try req.header_callback(response);
|
||||
if (!proceed) {
|
||||
req.error_callback(req.ctx, error.Abort);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (cached.data) {
|
||||
.buffer => |data| {
|
||||
if (data.len > 0) {
|
||||
try req.data_callback(response, data);
|
||||
}
|
||||
},
|
||||
.file => |f| {
|
||||
const file = f.file;
|
||||
var buf: [1024]u8 = undefined;
|
||||
var file_reader = file.reader(&buf);
|
||||
try file_reader.seekTo(f.offset);
|
||||
const reader = &file_reader.interface;
|
||||
var read_buf: [1024]u8 = undefined;
|
||||
var remaining = f.len;
|
||||
while (remaining > 0) {
|
||||
const read_len = @min(read_buf.len, remaining);
|
||||
const n = try reader.readSliceShort(read_buf[0..read_len]);
|
||||
if (n == 0) break;
|
||||
remaining -= n;
|
||||
try req.data_callback(response, read_buf[0..n]);
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
try req.done_callback(req.ctx);
|
||||
}
|
||||
|
||||
const CacheContext = struct {
|
||||
arena: std.mem.Allocator,
|
||||
context: Context,
|
||||
transfer: ?*Transfer = null,
|
||||
forward: Forward,
|
||||
req_url: [:0]const u8,
|
||||
req_headers: http.Headers,
|
||||
pending_metadata: ?*CachedMetadata = null,
|
||||
|
||||
fn startCallback(response: Response) anyerror!void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(response.ctx));
|
||||
self.transfer = response.inner.transfer;
|
||||
return self.forward.forwardStart(response);
|
||||
}
|
||||
|
||||
fn headerCallback(response: Response) anyerror!bool {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(response.ctx));
|
||||
const allocator = self.arena;
|
||||
|
||||
const transfer = response.inner.transfer;
|
||||
var rh = &transfer.response_header.?;
|
||||
|
||||
const conn = transfer._conn.?;
|
||||
|
||||
const vary = if (conn.getResponseHeader("vary", 0)) |h| h.value else null;
|
||||
|
||||
const maybe_cm = try Cache.tryCache(
|
||||
allocator,
|
||||
std.time.timestamp(),
|
||||
transfer.url,
|
||||
rh.status,
|
||||
rh.contentType(),
|
||||
if (conn.getResponseHeader("cache-control", 0)) |h| h.value else null,
|
||||
vary,
|
||||
if (conn.getResponseHeader("age", 0)) |h| h.value else null,
|
||||
conn.getResponseHeader("set-cookie", 0) != null,
|
||||
conn.getResponseHeader("authorization", 0) != null,
|
||||
);
|
||||
|
||||
if (maybe_cm) |cm| {
|
||||
var iter = transfer.responseHeaderIterator();
|
||||
var header_list = try iter.collect(allocator);
|
||||
const end_of_response = header_list.items.len;
|
||||
|
||||
if (vary) |vary_str| {
|
||||
var req_it = self.req_headers.iterator();
|
||||
while (req_it.next()) |hdr| {
|
||||
var vary_iter = std.mem.splitScalar(u8, vary_str, ',');
|
||||
while (vary_iter.next()) |part| {
|
||||
const name = std.mem.trim(u8, part, &std.ascii.whitespace);
|
||||
if (std.ascii.eqlIgnoreCase(hdr.name, name)) {
|
||||
try header_list.append(allocator, .{
|
||||
.name = try allocator.dupe(u8, hdr.name),
|
||||
.value = try allocator.dupe(u8, hdr.value),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const metadata = try allocator.create(CachedMetadata);
|
||||
metadata.* = cm;
|
||||
metadata.headers = header_list.items[0..end_of_response];
|
||||
metadata.vary_headers = header_list.items[end_of_response..];
|
||||
self.pending_metadata = metadata;
|
||||
}
|
||||
}
|
||||
|
||||
return self.forward.forwardHeader(response);
|
||||
}
|
||||
|
||||
fn doneCallback(ctx: *anyopaque) anyerror!void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(ctx));
|
||||
defer self.context.network.app.arena_pool.release(self.arena);
|
||||
|
||||
const transfer = self.transfer orelse @panic("Start Callback didn't set CacheLayer.transfer");
|
||||
|
||||
if (self.pending_metadata) |metadata| {
|
||||
const cache = &self.context.network.cache.?;
|
||||
|
||||
log.debug(.browser, "http cache", .{ .key = self.req_url, .metadata = metadata });
|
||||
cache.put(metadata.*, transfer._stream_buffer.items) catch |err| {
|
||||
log.warn(.http, "cache put failed", .{ .err = err });
|
||||
};
|
||||
log.debug(.browser, "http.cache.put", .{ .url = self.req_url });
|
||||
}
|
||||
|
||||
return self.forward.forwardDone();
|
||||
}
|
||||
|
||||
fn shutdownCallback(ctx: *anyopaque) void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(ctx));
|
||||
defer self.context.network.app.arena_pool.release(self.arena);
|
||||
self.forward.forwardShutdown();
|
||||
}
|
||||
|
||||
fn errorCallback(ctx: *anyopaque, e: anyerror) void {
|
||||
const self: *CacheContext = @ptrCast(@alignCast(ctx));
|
||||
defer self.context.network.app.arena_pool.release(self.arena);
|
||||
self.forward.forwardErr(e);
|
||||
}
|
||||
};
|
||||
135
src/network/layer/Forward.zig
Normal file
135
src/network/layer/Forward.zig
Normal file
@@ -0,0 +1,135 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
|
||||
const Forward = @This();
|
||||
|
||||
ctx: *anyopaque,
|
||||
start: ?Request.StartCallback,
|
||||
header: Request.HeaderCallback,
|
||||
data: Request.DataCallback,
|
||||
done: Request.DoneCallback,
|
||||
err: Request.ErrorCallback,
|
||||
shutdown: ?Request.ShutdownCallback,
|
||||
|
||||
pub fn fromRequest(req: Request) Forward {
|
||||
return .{
|
||||
.ctx = req.ctx,
|
||||
.start = req.start_callback,
|
||||
.header = req.header_callback,
|
||||
.data = req.data_callback,
|
||||
.done = req.done_callback,
|
||||
.err = req.error_callback,
|
||||
.shutdown = req.shutdown_callback,
|
||||
};
|
||||
}
|
||||
|
||||
pub const Overrides = struct {
|
||||
start: ?Request.StartCallback = null,
|
||||
header: ?Request.HeaderCallback = null,
|
||||
data: ?Request.DataCallback = null,
|
||||
done: ?Request.DoneCallback = null,
|
||||
err: ?Request.ErrorCallback = null,
|
||||
shutdown: ?Request.ShutdownCallback = null,
|
||||
};
|
||||
|
||||
pub fn wrapRequest(
|
||||
self: *Forward,
|
||||
req: Request,
|
||||
new_ctx: anytype,
|
||||
comptime field: []const u8,
|
||||
overrides: Overrides,
|
||||
) Request {
|
||||
const T = @TypeOf(new_ctx.*);
|
||||
const PassthroughT = makePassthrough(T, field);
|
||||
var wrapped = req;
|
||||
wrapped.ctx = new_ctx;
|
||||
wrapped.start_callback = overrides.start orelse if (self.start != null) PassthroughT.start else null;
|
||||
wrapped.header_callback = overrides.header orelse PassthroughT.header;
|
||||
wrapped.data_callback = overrides.data orelse PassthroughT.data;
|
||||
wrapped.done_callback = overrides.done orelse PassthroughT.done;
|
||||
wrapped.error_callback = overrides.err orelse PassthroughT.err;
|
||||
wrapped.shutdown_callback = overrides.shutdown orelse if (self.shutdown != null) PassthroughT.shutdown else null;
|
||||
return wrapped;
|
||||
}
|
||||
|
||||
fn makePassthrough(comptime T: type, comptime field: []const u8) type {
|
||||
return struct {
|
||||
pub fn start(response: Response) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardStart(response);
|
||||
}
|
||||
|
||||
pub fn header(response: Response) anyerror!bool {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardHeader(response);
|
||||
}
|
||||
|
||||
pub fn data(response: Response, chunk: []const u8) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(response.ctx));
|
||||
return @field(self, field).forwardData(response, chunk);
|
||||
}
|
||||
|
||||
pub fn done(ctx_ptr: *anyopaque) anyerror!void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
return @field(self, field).forwardDone();
|
||||
}
|
||||
|
||||
pub fn err(ctx_ptr: *anyopaque, e: anyerror) void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
@field(self, field).forwardErr(e);
|
||||
}
|
||||
|
||||
pub fn shutdown(ctx_ptr: *anyopaque) void {
|
||||
const self: *T = @ptrCast(@alignCast(ctx_ptr));
|
||||
@field(self, field).forwardShutdown();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn forwardStart(self: Forward, response: Response) anyerror!void {
|
||||
var fwd = response;
|
||||
fwd.ctx = self.ctx;
|
||||
if (self.start) |cb| try cb(fwd);
|
||||
}
|
||||
|
||||
pub fn forwardHeader(self: Forward, response: Response) anyerror!bool {
|
||||
var fwd = response;
|
||||
fwd.ctx = self.ctx;
|
||||
return self.header(fwd);
|
||||
}
|
||||
|
||||
pub fn forwardData(self: Forward, response: Response, chunk: []const u8) anyerror!void {
|
||||
var fwd = response;
|
||||
fwd.ctx = self.ctx;
|
||||
return self.data(fwd, chunk);
|
||||
}
|
||||
|
||||
pub fn forwardDone(self: Forward) anyerror!void {
|
||||
return self.done(self.ctx);
|
||||
}
|
||||
|
||||
pub fn forwardErr(self: Forward, e: anyerror) void {
|
||||
self.err(self.ctx, e);
|
||||
}
|
||||
|
||||
pub fn forwardShutdown(self: Forward) void {
|
||||
if (self.shutdown) |cb| cb(self.ctx);
|
||||
}
|
||||
255
src/network/layer/RobotsLayer.zig
Normal file
255
src/network/layer/RobotsLayer.zig
Normal file
@@ -0,0 +1,255 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
|
||||
const URL = @import("../../browser/URL.zig");
|
||||
const Robots = @import("../Robots.zig");
|
||||
const Context = @import("../../browser/HttpClient.zig").Context;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Response = @import("../../browser/HttpClient.zig").Response;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
const Forward = @import("Forward.zig");
|
||||
|
||||
const RobotsLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
obey_robots: bool,
|
||||
allocator: std.mem.Allocator,
|
||||
pending: std.StringHashMapUnmanaged(std.ArrayListUnmanaged(Request)) = .empty,
|
||||
|
||||
pub fn layer(self: *RobotsLayer) Layer {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{
|
||||
.request = request,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *RobotsLayer, allocator: std.mem.Allocator) void {
|
||||
var it = self.pending.iterator();
|
||||
while (it.next()) |entry| {
|
||||
entry.value_ptr.deinit(allocator);
|
||||
}
|
||||
self.pending.deinit(allocator);
|
||||
}
|
||||
|
||||
fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void {
|
||||
const self: *RobotsLayer = @ptrCast(@alignCast(ptr));
|
||||
|
||||
if (!self.obey_robots) {
|
||||
return self.next.request(ctx, req);
|
||||
}
|
||||
|
||||
const robots_url = try URL.getRobotsUrl(self.allocator, req.url);
|
||||
errdefer self.allocator.free(robots_url);
|
||||
|
||||
if (ctx.network.robot_store.get(robots_url)) |robot_entry| {
|
||||
defer self.allocator.free(robots_url);
|
||||
switch (robot_entry) {
|
||||
.present => |robots| {
|
||||
const path = URL.getPathname(req.url);
|
||||
if (!robots.isAllowed(path)) {
|
||||
log.warn(.http, "blocked by robots", .{ .url = req.url });
|
||||
req.error_callback(req.ctx, error.RobotsBlocked);
|
||||
return;
|
||||
}
|
||||
},
|
||||
.absent => {},
|
||||
}
|
||||
return self.next.request(ctx, req);
|
||||
}
|
||||
|
||||
return self.fetchRobotsThenRequest(ctx, robots_url, req);
|
||||
}
|
||||
|
||||
fn fetchRobotsThenRequest(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, req: Request) !void {
|
||||
const entry = try self.pending.getOrPut(self.allocator, robots_url);
|
||||
|
||||
if (!entry.found_existing) {
|
||||
errdefer self.allocator.free(robots_url);
|
||||
entry.value_ptr.* = .empty;
|
||||
|
||||
const robots_ctx = try self.allocator.create(RobotsContext);
|
||||
errdefer self.allocator.destroy(robots_ctx);
|
||||
robots_ctx.* = .{
|
||||
.layer = self,
|
||||
.ctx = ctx,
|
||||
.robots_url = robots_url,
|
||||
.buffer = .empty,
|
||||
};
|
||||
|
||||
const headers = try ctx.newHeaders();
|
||||
log.debug(.browser, "fetching robots.txt", .{ .robots_url = robots_url });
|
||||
|
||||
try self.next.request(ctx, .{
|
||||
.ctx = robots_ctx,
|
||||
.url = robots_url,
|
||||
.method = .GET,
|
||||
.headers = headers,
|
||||
.blocking = false,
|
||||
.frame_id = req.frame_id,
|
||||
.cookie_jar = req.cookie_jar,
|
||||
.cookie_origin = req.cookie_origin,
|
||||
.notification = req.notification,
|
||||
.resource_type = .fetch,
|
||||
.header_callback = RobotsContext.headerCallback,
|
||||
.data_callback = RobotsContext.dataCallback,
|
||||
.done_callback = RobotsContext.doneCallback,
|
||||
.error_callback = RobotsContext.errorCallback,
|
||||
.shutdown_callback = RobotsContext.shutdownCallback,
|
||||
});
|
||||
} else {
|
||||
self.allocator.free(robots_url);
|
||||
}
|
||||
|
||||
try entry.value_ptr.append(self.allocator, req);
|
||||
}
|
||||
|
||||
fn flushPending(self: *RobotsLayer, ctx: Context, robots_url: [:0]const u8, allowed: bool) void {
|
||||
var queued = self.pending.fetchRemove(robots_url) orelse
|
||||
@panic("RobotsLayer.flushPending: missing queue");
|
||||
defer queued.value.deinit(self.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
if (!allowed) {
|
||||
log.warn(.http, "blocked by robots", .{ .url = queued_req.url });
|
||||
defer queued_req.headers.deinit();
|
||||
queued_req.error_callback(queued_req.ctx, error.RobotsBlocked);
|
||||
} else {
|
||||
self.next.request(ctx, queued_req) catch |e| {
|
||||
defer queued_req.headers.deinit();
|
||||
queued_req.error_callback(queued_req.ctx, e);
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn flushPendingShutdown(self: *RobotsLayer, robots_url: [:0]const u8) void {
|
||||
var queued = self.pending.fetchRemove(robots_url) orelse
|
||||
@panic("RobotsLayer.flushPendingShutdown: missing queue");
|
||||
defer queued.value.deinit(self.allocator);
|
||||
|
||||
for (queued.value.items) |queued_req| {
|
||||
defer queued_req.headers.deinit();
|
||||
if (queued_req.shutdown_callback) |cb| cb(queued_req.ctx);
|
||||
}
|
||||
}
|
||||
|
||||
const RobotsContext = struct {
|
||||
layer: *RobotsLayer,
|
||||
ctx: Context,
|
||||
robots_url: [:0]const u8,
|
||||
buffer: std.ArrayListUnmanaged(u8),
|
||||
status: u16 = 0,
|
||||
|
||||
fn deinit(self: *RobotsContext) void {
|
||||
self.buffer.deinit(self.layer.allocator);
|
||||
self.layer.allocator.destroy(self);
|
||||
}
|
||||
|
||||
fn headerCallback(response: Response) anyerror!bool {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(response.ctx));
|
||||
switch (response.inner) {
|
||||
.transfer => |t| {
|
||||
if (t.response_header) |hdr| {
|
||||
log.debug(.browser, "robots status", .{ .status = hdr.status, .robots_url = self.robots_url });
|
||||
self.status = hdr.status;
|
||||
}
|
||||
if (t.getContentLength()) |cl| {
|
||||
try self.buffer.ensureTotalCapacity(self.layer.allocator, cl);
|
||||
}
|
||||
},
|
||||
.cached => {},
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
fn dataCallback(response: Response, data: []const u8) anyerror!void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(response.ctx));
|
||||
try self.buffer.appendSlice(self.layer.allocator, data);
|
||||
}
|
||||
|
||||
fn doneCallback(ctx_ptr: *anyopaque) anyerror!void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const ctx = self.ctx;
|
||||
const robots_url = self.robots_url;
|
||||
defer l.allocator.free(robots_url);
|
||||
defer self.deinit();
|
||||
|
||||
var allowed = true;
|
||||
const network = ctx.network;
|
||||
|
||||
switch (self.status) {
|
||||
200 => {
|
||||
if (self.buffer.items.len > 0) {
|
||||
const robots: ?Robots = network.robot_store.robotsFromBytes(
|
||||
network.config.http_headers.user_agent,
|
||||
self.buffer.items,
|
||||
) catch blk: {
|
||||
log.warn(.browser, "failed to parse robots", .{ .robots_url = robots_url });
|
||||
try network.robot_store.putAbsent(robots_url);
|
||||
break :blk null;
|
||||
};
|
||||
if (robots) |r| {
|
||||
try network.robot_store.put(robots_url, r);
|
||||
const path = URL.getPathname(self.layer.pending.get(robots_url).?.items[0].url);
|
||||
allowed = r.isAllowed(path);
|
||||
}
|
||||
}
|
||||
},
|
||||
404 => {
|
||||
log.debug(.http, "robots not found", .{ .url = robots_url });
|
||||
try network.robot_store.putAbsent(robots_url);
|
||||
},
|
||||
else => {
|
||||
log.debug(.http, "unexpected status on robots", .{
|
||||
.url = robots_url,
|
||||
.status = self.status,
|
||||
});
|
||||
try network.robot_store.putAbsent(robots_url);
|
||||
},
|
||||
}
|
||||
|
||||
l.flushPending(ctx, robots_url, allowed);
|
||||
}
|
||||
|
||||
fn errorCallback(ctx_ptr: *anyopaque, err: anyerror) void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const ctx = self.ctx;
|
||||
const robots_url = self.robots_url;
|
||||
defer l.allocator.free(robots_url);
|
||||
defer self.deinit();
|
||||
log.warn(.http, "robots fetch failed", .{ .err = err });
|
||||
l.flushPending(ctx, robots_url, true);
|
||||
}
|
||||
|
||||
fn shutdownCallback(ctx_ptr: *anyopaque) void {
|
||||
const self: *RobotsContext = @ptrCast(@alignCast(ctx_ptr));
|
||||
const l = self.layer;
|
||||
const robots_url = self.robots_url;
|
||||
defer l.allocator.free(robots_url);
|
||||
defer self.deinit();
|
||||
log.debug(.http, "robots fetch shutdown", .{});
|
||||
l.flushPendingShutdown(robots_url);
|
||||
}
|
||||
};
|
||||
54
src/network/layer/WebBotAuthLayer.zig
Normal file
54
src/network/layer/WebBotAuthLayer.zig
Normal file
@@ -0,0 +1,54 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
const std = @import("std");
|
||||
const log = @import("../../log.zig");
|
||||
|
||||
const URL = @import("../../browser/URL.zig");
|
||||
const WebBotAuth = @import("../WebBotAuth.zig");
|
||||
const Context = @import("../../browser/HttpClient.zig").Context;
|
||||
const Request = @import("../../browser/HttpClient.zig").Request;
|
||||
const Layer = @import("../../browser/HttpClient.zig").Layer;
|
||||
|
||||
const WebBotAuthLayer = @This();
|
||||
|
||||
next: Layer = undefined,
|
||||
|
||||
pub fn layer(self: *WebBotAuthLayer) Layer {
|
||||
return .{
|
||||
.ptr = self,
|
||||
.vtable = &.{ .request = request },
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(_: *WebBotAuthLayer, _: std.mem.Allocator) void {}
|
||||
|
||||
fn request(ptr: *anyopaque, ctx: Context, req: Request) anyerror!void {
|
||||
const self: *WebBotAuthLayer = @ptrCast(@alignCast(ptr));
|
||||
var our_req = req;
|
||||
|
||||
if (ctx.network.web_bot_auth) |*wba| {
|
||||
const arena = try ctx.network.app.arena_pool.acquire(.{ .debug = "WebBotAuthLayer" });
|
||||
defer ctx.network.app.arena_pool.release(arena);
|
||||
|
||||
const authority = URL.getHost(req.url);
|
||||
try wba.signRequest(arena, &our_req.headers, authority);
|
||||
}
|
||||
|
||||
return self.next.request(ctx, our_req);
|
||||
}
|
||||
Reference in New Issue
Block a user