mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-29 15:13:28 +00:00
Merge pull request #601 from lightpanda-io/http_gzip
Support gzip compressed content for the synchronous http client
This commit is contained in:
@@ -214,6 +214,7 @@ pub const Request = struct {
|
|||||||
if (opts.tls_verify_host) |override| {
|
if (opts.tls_verify_host) |override| {
|
||||||
self._tls_verify_host = override;
|
self._tls_verify_host = override;
|
||||||
}
|
}
|
||||||
|
|
||||||
try self.prepareInitialSend();
|
try self.prepareInitialSend();
|
||||||
return self.doSendSync();
|
return self.doSendSync();
|
||||||
}
|
}
|
||||||
@@ -905,6 +906,37 @@ const SyncHandler = struct {
|
|||||||
std.debug.assert(result.done or reader.body_reader != null);
|
std.debug.assert(result.done or reader.body_reader != null);
|
||||||
std.debug.assert(result.data == null);
|
std.debug.assert(result.data == null);
|
||||||
|
|
||||||
|
// See CompressedReader for an explanation. This isn't great code. Sorry.
|
||||||
|
if (reader.response.get("content-encoding")) |ce| {
|
||||||
|
if (std.ascii.eqlIgnoreCase(ce, "gzip") == false) {
|
||||||
|
log.err("unsupported content encoding '{s}' for: {}", .{ ce, request.uri });
|
||||||
|
return error.UnsupportedContentEncoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
var compress_reader = CompressedReader{
|
||||||
|
.over = "",
|
||||||
|
.inner = &reader,
|
||||||
|
.done = result.done,
|
||||||
|
.buffer = state.read_buf,
|
||||||
|
.data = result.unprocessed,
|
||||||
|
.connection = connection,
|
||||||
|
};
|
||||||
|
var body: std.ArrayListUnmanaged(u8) = .{};
|
||||||
|
var decompressor = std.compress.gzip.decompressor(compress_reader.reader());
|
||||||
|
try decompressor.decompress(body.writer(request.arena));
|
||||||
|
|
||||||
|
return .{
|
||||||
|
.header = reader.response,
|
||||||
|
._done = true,
|
||||||
|
._request = request,
|
||||||
|
._peek_buf = body.items,
|
||||||
|
._peek_len = body.items.len,
|
||||||
|
._buf = undefined,
|
||||||
|
._reader = undefined,
|
||||||
|
._connection = undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
._buf = buf,
|
._buf = buf,
|
||||||
._request = request,
|
._request = request,
|
||||||
@@ -996,6 +1028,90 @@ const SyncHandler = struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// We don't ask for encoding, but some providers (CloudFront!!)
|
||||||
|
// encode anyways. This is an issue for our async-path because Zig's
|
||||||
|
// decompressors aren't async-friendly - they want to pull data in
|
||||||
|
// rather than being given data when it's available. Unfortunately
|
||||||
|
// this is a problem for our own Reader, which is shared by both our
|
||||||
|
// sync and async handlers, but has an async-ish API. It's hard to
|
||||||
|
// use our Reader with Zig's decompressors. Given the way our Reader
|
||||||
|
// is write, this is a problem even for our sync requests. For now, we
|
||||||
|
// just read the entire body into memory, which makes things manageable.
|
||||||
|
// Finally, we leverage the existing `peek` logic in the Response to make
|
||||||
|
// this fully-read content available.
|
||||||
|
// If you think about it, this CompressedReader is just a fancy "peek" over
|
||||||
|
// the entire body.
|
||||||
|
const CompressedReader = struct {
|
||||||
|
done: bool,
|
||||||
|
buffer: []u8,
|
||||||
|
inner: *Reader,
|
||||||
|
connection: Connection,
|
||||||
|
|
||||||
|
// Represents data directly from the socket. It hasn't been processed
|
||||||
|
// by the body reader. It could, for example, have chunk information in it.
|
||||||
|
// Needed to be processed by `inner` before it can be returned
|
||||||
|
data: ?[]u8,
|
||||||
|
|
||||||
|
// Represents data that _was_ processed by the body reader, but coudln't
|
||||||
|
// fit in the destination buffer given to read.
|
||||||
|
// This adds complexity, but the reality is that we can read more data
|
||||||
|
// from the socket than space we have in the given `dest`. Think of
|
||||||
|
// this as doing something like a BufferedReader. We _could_ limit
|
||||||
|
// our reads to dest.len, but we can overread when initially reading
|
||||||
|
// the header/response, and at that point, we don't know anything about
|
||||||
|
// this Compression stuff.
|
||||||
|
over: []const u8,
|
||||||
|
|
||||||
|
const IOReader = std.io.Reader(*CompressedReader, anyerror, read);
|
||||||
|
|
||||||
|
pub fn reader(self: *CompressedReader) IOReader {
|
||||||
|
return .{ .context = self };
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read(self: *CompressedReader, dest: []u8) anyerror!usize {
|
||||||
|
if (self.over.len > 0) {
|
||||||
|
// data from a previous `read` which is ready to go as-is. i.e.
|
||||||
|
// it's already been processed by inner (the body reader).
|
||||||
|
const l = @min(self.over.len, dest.len);
|
||||||
|
@memcpy(dest[0..l], self.over[0..l]);
|
||||||
|
self.over = self.over[l..];
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
var buffer = self.buffer;
|
||||||
|
buffer = buffer[0..@min(dest.len, buffer.len)];
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
if (try self.processData()) |data| {
|
||||||
|
const l = @min(data.len, dest.len);
|
||||||
|
@memcpy(dest[0..l], data[0..l]);
|
||||||
|
|
||||||
|
// if we processed more data than fits into dest, we store
|
||||||
|
// it in `over` for the next call to `read`
|
||||||
|
self.over = data[l..];
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self.done) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const n = try self.connection.read(self.buffer);
|
||||||
|
self.data = self.buffer[0..n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn processData(self: *CompressedReader) !?[]u8 {
|
||||||
|
const data = self.data orelse return null;
|
||||||
|
const result = try self.inner.process(data);
|
||||||
|
|
||||||
|
self.done = result.done;
|
||||||
|
self.data = result.unprocessed; // for the next call
|
||||||
|
|
||||||
|
return result.data;
|
||||||
|
}
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
// Used for reading the response (both the header and the body)
|
// Used for reading the response (both the header and the body)
|
||||||
@@ -1576,6 +1692,13 @@ pub const Response = struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn peek(self: *Response) ![]u8 {
|
pub fn peek(self: *Response) ![]u8 {
|
||||||
|
if (self._peek_len > 0) {
|
||||||
|
// Under normal usage, this is only possible when we're dealing
|
||||||
|
// with a compressed response (despite not asking for it). We handle
|
||||||
|
// these responses by essentially peeking the entire body.
|
||||||
|
return self._peek_buf[0..self._peek_len];
|
||||||
|
}
|
||||||
|
|
||||||
if (try self.processData()) |data| {
|
if (try self.processData()) |data| {
|
||||||
// We already have some or all of the body. This happens because
|
// We already have some or all of the body. This happens because
|
||||||
// we always read as much as we can, so getting the header and
|
// we always read as much as we can, so getting the header and
|
||||||
@@ -1906,6 +2029,23 @@ test "HttpClient: sync with body" {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test "HttpClient: sync with gzip body" {
|
||||||
|
for (0..2) |i| {
|
||||||
|
var client = try testClient();
|
||||||
|
defer client.deinit();
|
||||||
|
|
||||||
|
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/gzip");
|
||||||
|
var req = try client.request(.GET, &uri);
|
||||||
|
var res = try req.sendSync(.{});
|
||||||
|
|
||||||
|
if (i == 0) {
|
||||||
|
try testing.expectEqual("A new browser built for machines\n", try res.peek());
|
||||||
|
}
|
||||||
|
try testing.expectEqual("A new browser built for machines\n", try res.next());
|
||||||
|
try testing.expectEqual("gzip", res.header.get("content-encoding"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
test "HttpClient: sync tls with body" {
|
test "HttpClient: sync tls with body" {
|
||||||
var arr: std.ArrayListUnmanaged(u8) = .{};
|
var arr: std.ArrayListUnmanaged(u8) = .{};
|
||||||
defer arr.deinit(testing.allocator);
|
defer arr.deinit(testing.allocator);
|
||||||
|
|||||||
@@ -494,6 +494,11 @@ fn serveHTTP(address: std.net.Address) !void {
|
|||||||
.status = .moved_permanently,
|
.status = .moved_permanently,
|
||||||
.extra_headers = &.{.{ .name = "LOCATION", .value = "https://127.0.0.1:9581/http_client/body" }},
|
.extra_headers = &.{.{ .name = "LOCATION", .value = "https://127.0.0.1:9581/http_client/body" }},
|
||||||
});
|
});
|
||||||
|
} else if (std.mem.eql(u8, path, "/http_client/gzip")) {
|
||||||
|
const body = &.{ 0x1f, 0x8b, 0x08, 0x08, 0x01, 0xc6, 0x19, 0x68, 0x00, 0x03, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x68, 0x74, 0x6d, 0x6c, 0x00, 0x73, 0x54, 0xc8, 0x4b, 0x2d, 0x57, 0x48, 0x2a, 0xca, 0x2f, 0x2f, 0x4e, 0x2d, 0x52, 0x48, 0x2a, 0xcd, 0xcc, 0x29, 0x51, 0x48, 0xcb, 0x2f, 0x52, 0xc8, 0x4d, 0x4c, 0xce, 0xc8, 0xcc, 0x4b, 0x2d, 0xe6, 0x02, 0x00, 0xe7, 0xc3, 0x4b, 0x27, 0x21, 0x00, 0x00, 0x00 };
|
||||||
|
try request.respond(body, .{
|
||||||
|
.extra_headers = &.{.{ .name = "Content-Encoding", .value = "gzip" }},
|
||||||
|
});
|
||||||
} else if (std.mem.eql(u8, path, "/http_client/echo")) {
|
} else if (std.mem.eql(u8, path, "/http_client/echo")) {
|
||||||
var headers: std.ArrayListUnmanaged(std.http.Header) = .{};
|
var headers: std.ArrayListUnmanaged(std.http.Header) = .{};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user