Merge pull request #553 from lightpanda-io/mime_sniffing

Try to sniff the mime type based on the body content
This commit is contained in:
Pierre Tachoire
2025-04-22 17:25:29 +02:00
committed by GitHub
5 changed files with 343 additions and 160 deletions

View File

@@ -435,24 +435,19 @@ pub const Page = struct {
log.info("GET {any} {d}", .{ url, header.status }); log.info("GET {any} {d}", .{ url, header.status });
const ct = blk: { const content_type = header.get("content-type");
break :blk header.get("content-type") orelse {
// no content type in HTTP headers.
// TODO try to sniff mime type from the body.
log.info("no content-type HTTP header", .{});
// Assume it's HTML for now. const mime: Mime = blk: {
break :blk "text/html; charset=utf-8"; if (content_type) |ct| {
}; break :blk try Mime.parse(arena, ct);
}; }
break :blk Mime.sniff(try response.peek());
log.debug("header content-type: {s}", .{ct}); } orelse .unknown;
var mime = try Mime.parse(arena, ct);
if (mime.isHTML()) { if (mime.isHTML()) {
try self.loadHTMLDoc(&response, mime.charset orelse "utf-8"); try self.loadHTMLDoc(&response, mime.charset orelse "utf-8");
} else { } else {
log.info("non-HTML document: {s}", .{ct}); log.info("non-HTML document: {s}", .{content_type orelse "null"});
var arr: std.ArrayListUnmanaged(u8) = .{}; var arr: std.ArrayListUnmanaged(u8) = .{};
while (try response.next()) |data| { while (try response.next()) |data| {
try arr.appendSlice(arena, try arena.dupe(u8, data)); try arr.appendSlice(arena, try arena.dupe(u8, data));

View File

@@ -24,10 +24,17 @@ pub const Mime = struct {
params: []const u8 = "", params: []const u8 = "",
charset: ?[]const u8 = null, charset: ?[]const u8 = null,
pub const unknown = Mime{
.params = "",
.charset = "",
.content_type = .{ .unknown = {} },
};
pub const ContentTypeEnum = enum { pub const ContentTypeEnum = enum {
text_xml, text_xml,
text_html, text_html,
text_plain, text_plain,
unknown,
other, other,
}; };
@@ -35,21 +42,26 @@ pub const Mime = struct {
text_xml: void, text_xml: void,
text_html: void, text_html: void,
text_plain: void, text_plain: void,
unknown: void,
other: struct { type: []const u8, sub_type: []const u8 }, other: struct { type: []const u8, sub_type: []const u8 },
}; };
pub fn parse(arena: Allocator, input: []const u8) !Mime { pub fn parse(arena: Allocator, input: []u8) !Mime {
if (input.len > 255) { if (input.len > 255) {
return error.TooBig; return error.TooBig;
} }
var trimmed = trim(input);
const content_type, const type_len = try parseContentType(trimmed); // Zig's trim API is broken. The return type is always `[]const u8`,
if (type_len >= trimmed.len) { // even if the input type is `[]u8`. @constCast is safe here.
var normalized = @constCast(std.mem.trim(u8, input, &std.ascii.whitespace));
_ = std.ascii.lowerString(normalized, normalized);
const content_type, const type_len = try parseContentType(normalized);
if (type_len >= normalized.len) {
return .{ .content_type = content_type }; return .{ .content_type = content_type };
} }
const params = trimLeft(trimmed[type_len..]); const params = trimLeft(normalized[type_len..]);
var charset: ?[]const u8 = null; var charset: ?[]const u8 = null;
@@ -63,11 +75,12 @@ pub const Mime = struct {
return error.Invalid; return error.Invalid;
} }
switch (name.len) { const attribute_name = std.meta.stringToEnum(enum {
7 => if (isCaseEqual("charset", name)) { charset,
charset = try parseValue(arena, value); }, name) orelse continue;
},
else => {}, switch (attribute_name) {
.charset => charset = try parseAttributeValue(arena, value),
} }
} }
@@ -78,66 +91,113 @@ pub const Mime = struct {
}; };
} }
pub fn sniff(body: []const u8) ?Mime {
// 0x0C is form feed
const content = std.mem.trimLeft(u8, body, &.{ ' ', '\t', '\n', '\r', 0x0C });
if (content.len == 0) {
return null;
}
if (content[0] != '<') {
if (std.mem.startsWith(u8, content, &.{ 0xEF, 0xBB, 0xBF })) {
// UTF-8 BOM
return .{ .content_type = .{ .text_plain = {} } };
}
if (std.mem.startsWith(u8, content, &.{ 0xFE, 0xFF })) {
// UTF-16 big-endian BOM
return .{ .content_type = .{ .text_plain = {} } };
}
if (std.mem.startsWith(u8, content, &.{ 0xFF, 0xFE })) {
// UTF-16 little-endian BOM
return .{ .content_type = .{ .text_plain = {} } };
}
return null;
}
// The longest prefix we have is "<!DOCTYPE HTML ", 15 bytes. If we're
// here, we already know content[0] == '<', so we can skip that. So 14
// bytes.
// +1 because we don't need the leading '<'
var buf: [14]u8 = undefined;
const stripped = content[1..];
const prefix_len = @min(stripped.len, buf.len);
const prefix = std.ascii.lowerString(&buf, stripped[0..prefix_len]);
// we already know it starts with a <
const known_prefixes = [_]struct { []const u8, ContentType }{
.{ "!doctype html", .{ .text_html = {} } },
.{ "html", .{ .text_html = {} } },
.{ "script", .{ .text_html = {} } },
.{ "iframe", .{ .text_html = {} } },
.{ "h1", .{ .text_html = {} } },
.{ "div", .{ .text_html = {} } },
.{ "font", .{ .text_html = {} } },
.{ "table", .{ .text_html = {} } },
.{ "a", .{ .text_html = {} } },
.{ "style", .{ .text_html = {} } },
.{ "title", .{ .text_html = {} } },
.{ "b", .{ .text_html = {} } },
.{ "body", .{ .text_html = {} } },
.{ "br", .{ .text_html = {} } },
.{ "p", .{ .text_html = {} } },
.{ "!--", .{ .text_html = {} } },
.{ "xml", .{ .text_xml = {} } },
};
inline for (known_prefixes) |kp| {
const known_prefix = kp.@"0";
if (std.mem.startsWith(u8, prefix, known_prefix) and prefix.len > known_prefix.len) {
const next = prefix[known_prefix.len];
// a "tag-terminating-byte"
if (next == ' ' or next == '>') {
return .{ .content_type = kp.@"1" };
}
}
}
return null;
}
pub fn isHTML(self: *const Mime) bool { pub fn isHTML(self: *const Mime) bool {
return self.content_type == .text_html; return self.content_type == .text_html;
} }
// we expect value to be lowercase
fn parseContentType(value: []const u8) !struct { ContentType, usize } { fn parseContentType(value: []const u8) !struct { ContentType, usize } {
const separator = std.mem.indexOfScalarPos(u8, value, 0, '/') orelse { const end = std.mem.indexOfScalarPos(u8, value, 0, ';') orelse value.len;
return error.Invalid; const type_name = trimRight(value[0..end]);
}; const attribute_start = end + 1;
const end = std.mem.indexOfScalarPos(u8, value, separator, ';') orelse blk: {
break :blk value.len; if (std.meta.stringToEnum(enum {
}; @"text/xml",
@"text/html",
@"text/plain",
}, type_name)) |known_type| {
const ct: ContentType = switch (known_type) {
.@"text/xml" => .{ .text_xml = {} },
.@"text/html" => .{ .text_html = {} },
.@"text/plain" => .{ .text_plain = {} },
};
return .{ ct, attribute_start };
}
const separator = std.mem.indexOfScalarPos(u8, type_name, 0, '/') orelse return error.Invalid;
const main_type = value[0..separator]; const main_type = value[0..separator];
const sub_type = trimRight(value[separator + 1 .. end]); const sub_type = trimRight(value[separator + 1 .. end]);
if (parseCommonContentType(main_type, sub_type)) |content_type| { if (main_type.len == 0 or validType(main_type) == false) {
return .{ content_type, end + 1 };
}
if (main_type.len == 0) {
return error.Invalid; return error.Invalid;
} }
if (validType(main_type) == false) { if (sub_type.len == 0 or validType(sub_type) == false) {
return error.Invalid; return error.Invalid;
} }
if (sub_type.len == 0) { return .{ .{ .other = .{
return error.Invalid;
}
if (validType(sub_type) == false) {
return error.Invalid;
}
const content_type = ContentType{ .other = .{
.type = main_type, .type = main_type,
.sub_type = sub_type, .sub_type = sub_type,
} }; } }, attribute_start };
return .{ content_type, end + 1 };
}
fn parseCommonContentType(main_type: []const u8, sub_type: []const u8) ?ContentType {
switch (main_type.len) {
4 => if (isCaseEqual("text", main_type)) {
switch (sub_type.len) {
3 => if (isCaseEqual("xml", sub_type)) {
return .{ .text_xml = {} };
},
4 => if (isCaseEqual("html", sub_type)) {
return .{ .text_html = {} };
},
5 => if (isCaseEqual("plain", sub_type)) {
return .{ .text_plain = {} };
},
else => {},
}
},
else => {},
}
return null;
} }
const T_SPECIAL = blk: { const T_SPECIAL = blk: {
@@ -148,7 +208,7 @@ pub const Mime = struct {
break :blk v; break :blk v;
}; };
fn parseValue(arena: Allocator, value: []const u8) ![]const u8 { fn parseAttributeValue(arena: Allocator, value: []const u8) ![]const u8 {
if (value[0] != '"') { if (value[0] != '"') {
return value; return value;
} }
@@ -218,10 +278,6 @@ pub const Mime = struct {
return true; return true;
} }
fn trim(s: []const u8) []const u8 {
return std.mem.trim(u8, s, &std.ascii.whitespace);
}
fn trimLeft(s: []const u8) []const u8 { fn trimLeft(s: []const u8) []const u8 {
return std.mem.trimLeft(u8, s, &std.ascii.whitespace); return std.mem.trimLeft(u8, s, &std.ascii.whitespace);
} }
@@ -229,28 +285,12 @@ pub const Mime = struct {
fn trimRight(s: []const u8) []const u8 { fn trimRight(s: []const u8) []const u8 {
return std.mem.trimRight(u8, s, &std.ascii.whitespace); return std.mem.trimRight(u8, s, &std.ascii.whitespace);
} }
fn isCaseEqual(comptime target: anytype, value: []const u8) bool {
// - 8 beause we don't care about the sentinel
const bit_len = @bitSizeOf(@TypeOf(target.*)) - 8;
const byte_len = bit_len / 8;
const T = @Type(.{ .int = .{
.bits = bit_len,
.signedness = .unsigned,
} });
const bit_target: T = @bitCast(@as(*const [byte_len]u8, target).*);
if (@as(T, @bitCast(value[0..byte_len].*)) == bit_target) {
return true;
}
return std.ascii.eqlIgnoreCase(value, target);
}
}; };
const testing = std.testing; const testing = @import("../testing.zig");
test "Mime: invalid " { test "Mime: invalid " {
defer testing.reset();
const invalids = [_][]const u8{ const invalids = [_][]const u8{
"", "",
"text", "text",
@@ -270,11 +310,14 @@ test "Mime: invalid " {
}; };
for (invalids) |invalid| { for (invalids) |invalid| {
try testing.expectError(error.Invalid, Mime.parse(undefined, invalid)); const mutable_input = try testing.arena_allocator.dupe(u8, invalid);
try testing.expectError(error.Invalid, Mime.parse(undefined, mutable_input));
} }
} }
test "Mime: parse common" { test "Mime: parse common" {
defer testing.reset();
try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml"); try expect(.{ .content_type = .{ .text_xml = {} } }, "text/xml");
try expect(.{ .content_type = .{ .text_html = {} } }, "text/html"); try expect(.{ .content_type = .{ .text_html = {} } }, "text/html");
try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain"); try expect(.{ .content_type = .{ .text_plain = {} } }, "text/plain");
@@ -297,6 +340,8 @@ test "Mime: parse common" {
} }
test "Mime: parse uncommon" { test "Mime: parse uncommon" {
defer testing.reset();
const text_javascript = Expectation{ const text_javascript = Expectation{
.content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } }, .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } },
}; };
@@ -306,12 +351,14 @@ test "Mime: parse uncommon" {
try expect(text_javascript, " text/javascript\t ;"); try expect(text_javascript, " text/javascript\t ;");
try expect( try expect(
.{ .content_type = .{ .other = .{ .type = "Text", .sub_type = "Javascript" } } }, .{ .content_type = .{ .other = .{ .type = "text", .sub_type = "javascript" } } },
"Text/Javascript", "Text/Javascript",
); );
} }
test "Mime: parse charset" { test "Mime: parse charset" {
defer testing.reset();
try expect(.{ try expect(.{
.content_type = .{ .text_xml = {} }, .content_type = .{ .text_xml = {} },
.charset = "utf-8", .charset = "utf-8",
@@ -332,11 +379,12 @@ test "Mime: parse charset" {
} }
test "Mime: isHTML" { test "Mime: isHTML" {
defer testing.reset();
const isHTML = struct { const isHTML = struct {
fn isHTML(expected: bool, input: []const u8) !void { fn isHTML(expected: bool, input: []const u8) !void {
var arena = std.heap.ArenaAllocator.init(testing.allocator); const mutable_input = try testing.arena_allocator.dupe(u8, input);
defer arena.deinit(); var mime = try Mime.parse(testing.arena_allocator, mutable_input);
var mime = try Mime.parse(arena.allocator(), input);
try testing.expectEqual(expected, mime.isHTML()); try testing.expectEqual(expected, mime.isHTML());
} }
}.isHTML; }.isHTML;
@@ -348,6 +396,71 @@ test "Mime: isHTML" {
try isHTML(false, "over/9000"); try isHTML(false, "over/9000");
} }
test "Mime: sniff" {
try testing.expectEqual(null, Mime.sniff(""));
try testing.expectEqual(null, Mime.sniff("<htm"));
try testing.expectEqual(null, Mime.sniff("<html!"));
try testing.expectEqual(null, Mime.sniff("<a_"));
try testing.expectEqual(null, Mime.sniff("<!doctype html"));
try testing.expectEqual(null, Mime.sniff("<!doctype html>"));
try testing.expectEqual(null, Mime.sniff("\n <!doctype html>"));
try testing.expectEqual(null, Mime.sniff("\n \t <font/>"));
const expectHTML = struct {
fn expect(input: []const u8) !void {
try testing.expectEqual(.text_html, std.meta.activeTag(Mime.sniff(input).?.content_type));
}
}.expect;
try expectHTML("<!doctype html ");
try expectHTML("\n \t <!DOCTYPE HTML ");
try expectHTML("<html ");
try expectHTML("\n \t <HtmL> even more stufff");
try expectHTML("<script>");
try expectHTML("\n \t <SCRIpt >alert(document.cookies)</script>");
try expectHTML("<iframe>");
try expectHTML(" \t <ifRAME >");
try expectHTML("<h1>");
try expectHTML(" <H1>");
try expectHTML("<div>");
try expectHTML("\n\r\r <DiV>");
try expectHTML("<font>");
try expectHTML(" <fonT>");
try expectHTML("<table>");
try expectHTML("\t\t<TAblE>");
try expectHTML("<a>");
try expectHTML("\n\n<A>");
try expectHTML("<style>");
try expectHTML(" \n\t <STyLE>");
try expectHTML("<title>");
try expectHTML(" \n\t <TITLE>");
try expectHTML("<b>");
try expectHTML(" \n\t <B>");
try expectHTML("<body>");
try expectHTML(" \n\t <BODY>");
try expectHTML("<br>");
try expectHTML(" \n\t <BR>");
try expectHTML("<p>");
try expectHTML(" \n\t <P>");
try expectHTML("<!-->");
try expectHTML(" \n\t <!-->");
}
const Expectation = struct { const Expectation = struct {
content_type: Mime.ContentType, content_type: Mime.ContentType,
params: []const u8 = "", params: []const u8 = "",
@@ -355,11 +468,9 @@ const Expectation = struct {
}; };
fn expect(expected: Expectation, input: []const u8) !void { fn expect(expected: Expectation, input: []const u8) !void {
var arena = std.heap.ArenaAllocator.init(testing.allocator); const mutable_input = try testing.arena_allocator.dupe(u8, input);
defer arena.deinit();
const actual = try Mime.parse(arena.allocator(), input);
const actual = try Mime.parse(testing.arena_allocator, mutable_input);
try testing.expectEqual( try testing.expectEqual(
std.meta.activeTag(expected.content_type), std.meta.activeTag(expected.content_type),
std.meta.activeTag(actual.content_type), std.meta.activeTag(actual.content_type),
@@ -368,16 +479,16 @@ fn expect(expected: Expectation, input: []const u8) !void {
switch (expected.content_type) { switch (expected.content_type) {
.other => |e| { .other => |e| {
const a = actual.content_type.other; const a = actual.content_type.other;
try testing.expectEqualStrings(e.type, a.type); try testing.expectEqual(e.type, a.type);
try testing.expectEqualStrings(e.sub_type, a.sub_type); try testing.expectEqual(e.sub_type, a.sub_type);
}, },
else => {}, // already asserted above else => {}, // already asserted above
} }
try testing.expectEqualStrings(expected.params, actual.params); try testing.expectEqual(expected.params, actual.params);
if (expected.charset) |ec| { if (expected.charset) |ec| {
try testing.expectEqualStrings(ec, actual.charset.?); try testing.expectEqual(ec, actual.charset.?);
} else { } else {
try testing.expectEqual(null, actual.charset); try testing.expectEqual(null, actual.charset);
} }

View File

@@ -254,7 +254,7 @@ pub const XMLHttpRequest = struct {
}; };
const ResponseObj = union(ResponseObjTag) { const ResponseObj = union(ResponseObjTag) {
Document: *parser.Document, Document: *parser.Document,
Failure: bool, Failure: void,
JSON: std.json.Parsed(JSONValue), JSON: std.json.Parsed(JSONValue),
fn deinit(self: ResponseObj) void { fn deinit(self: ResponseObj) void {
@@ -511,12 +511,8 @@ pub const XMLHttpRequest = struct {
} }
// extract a mime type from headers. // extract a mime type from headers.
{ if (header.get("content-type")) |ct| {
var raw: []const u8 = "text/xml"; self.response_mime = Mime.parse(self.arena, ct) catch |e| {
if (header.get("content-type")) |ct| {
raw = try self.arena.dupe(u8, ct);
}
self.response_mime = Mime.parse(self.arena, raw) catch |e| {
return self.onErr(e); return self.onErr(e);
}; };
} }
@@ -724,26 +720,24 @@ pub const XMLHttpRequest = struct {
// TODO parse XML. // TODO parse XML.
// https://xhr.spec.whatwg.org/#response-object // https://xhr.spec.whatwg.org/#response-object
fn setResponseObjDocument(self: *XMLHttpRequest) void { fn setResponseObjDocument(self: *XMLHttpRequest) void {
const response_mime = &self.response_mime.?; const mime = self.response_mime orelse return;
const isHTML = response_mime.isHTML(); if (mime.isHTML() == false) {
// TODO If finalMIME is not an HTML MIME type or an XML MIME type, then
// return.
if (!isHTML) {
return; return;
} }
var ccharset: [:0]const u8 = "utf-8"; var ccharset: [:0]const u8 = "utf-8";
if (response_mime.charset) |rc| { if (mime.charset) |rc| {
ccharset = self.arena.dupeZ(u8, rc) catch { if (std.mem.eql(u8, rc, "utf-8") == false) {
self.response_obj = .{ .Failure = true }; ccharset = self.arena.dupeZ(u8, rc) catch {
return; self.response_obj = .{ .Failure = {} };
}; return;
};
}
} }
var fbs = std.io.fixedBufferStream(self.response_bytes.items); var fbs = std.io.fixedBufferStream(self.response_bytes.items);
const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch { const doc = parser.documentHTMLParse(fbs.reader(), ccharset) catch {
self.response_obj = .{ .Failure = true }; self.response_obj = .{ .Failure = {} };
return; return;
}; };
@@ -766,7 +760,7 @@ pub const XMLHttpRequest = struct {
.{}, .{},
) catch |e| { ) catch |e| {
log.err("parse JSON: {}", .{e}); log.err("parse JSON: {}", .{e});
self.response_obj = .{ .Failure = true }; self.response_obj = .{ .Failure = {} };
return; return;
}; };

View File

@@ -32,9 +32,13 @@ const Loop = @import("../runtime/loop.zig").Loop;
const log = std.log.scoped(.http_client); const log = std.log.scoped(.http_client);
// We might need to peek at the body to try and sniff the content-type.
// While we only need a few bytes, in most cases we need to ignore leading
// whitespace, so we want to get a reasonable-sized chunk.
const PEEK_BUF_LEN = 1024;
const BUFFER_LEN = 32 * 1024; const BUFFER_LEN = 32 * 1024;
// The longest individual header line that we support
const MAX_HEADER_LINE_LEN = 4096; const MAX_HEADER_LINE_LEN = 4096;
// Thread-safe. Holds our root certificate, connection pool and state pool // Thread-safe. Holds our root certificate, connection pool and state pool
@@ -900,6 +904,7 @@ const SyncHandler = struct {
// object which can be iterated to get the body. // object which can be iterated to get the body.
std.debug.assert(result.done or reader.body_reader != null); std.debug.assert(result.done or reader.body_reader != null);
std.debug.assert(result.data == null); std.debug.assert(result.data == null);
return .{ return .{
._buf = buf, ._buf = buf,
._request = request, ._request = request,
@@ -907,6 +912,8 @@ const SyncHandler = struct {
._done = result.done, ._done = result.done,
._connection = connection, ._connection = connection,
._data = result.unprocessed, ._data = result.unprocessed,
._peek_len = 0,
._peek_buf = state.peek_buf,
.header = reader.response, .header = reader.response,
}; };
} }
@@ -1046,7 +1053,7 @@ const Reader = struct {
// Still parsing the header // Still parsing the header
// what data do we have leftover in `data`. // What data do we have leftover in `data`?
// When header_done == true, then this is part (or all) of the body // When header_done == true, then this is part (or all) of the body
// When header_done == false, then this is a header line that we didn't // When header_done == false, then this is a header line that we didn't
// have enough data for. // have enough data for.
@@ -1504,23 +1511,49 @@ pub const Progress = struct {
header: ResponseHeader, header: ResponseHeader,
}; };
// The value that we return from a synchronous requst. // The value that we return from a synchronous request.
pub const Response = struct { pub const Response = struct {
_reader: Reader, _reader: Reader,
_request: *Request, _request: *Request,
_buf: []u8,
_connection: SyncHandler.Connection, _connection: SyncHandler.Connection,
// the buffer to read the peeked data into
_peek_buf: []u8,
// the length of data we've peeked. The peeked_data is _peek_buf[0.._peek_len].
// It's possible for peek_len > 0 and _done == true, in which case, the
// _peeked data should be emitted once and subsequent calls to `next` should
// return null.
_peek_len: usize,
// What we'll read from the socket into. This is the State's read_buf
_buf: []u8,
// Whether or not we're done reading the response. When true, next will
// return null.
_done: bool, _done: bool,
// Any data we over-read while parsing the header. This will be returned on // Data that we've read. This can be set when the Response is first created
// the first call to next(); // from extra data received while parsing the body. Or, it can be set
// when `next` is called and we read more data from the socket.
_data: ?[]u8 = null, _data: ?[]u8 = null,
header: ResponseHeader, header: ResponseHeader,
pub fn next(self: *Response) !?[]u8 { pub fn next(self: *Response) !?[]u8 {
var buf = self._buf; // it's possible for peek_len > - and done == true. This would happen
// when, while peeking, we reached the end of the data. In that case,
// we return the peeked data once, and on subsequent call, we'll return
// null normally, because done == true;
const pl = self._peek_len;
if (pl > 0) {
self._peek_len = 0;
return self._peek_buf[0..pl];
}
return self._nextIgnorePeek(self._buf);
}
fn _nextIgnorePeek(self: *Response, buf: []u8) !?[]u8 {
while (true) { while (true) {
if (try self.processData()) |data| { if (try self.processData()) |data| {
return data; return data;
@@ -1541,14 +1574,38 @@ pub const Response = struct {
self._data = result.unprocessed; // for the next call self._data = result.unprocessed; // for the next call
return result.data; return result.data;
} }
pub fn peek(self: *Response) ![]u8 {
while (true) {
var peek_buf = self._peek_buf;
const peek_len = self._peek_len;
const data = (try self._nextIgnorePeek(peek_buf[peek_len..])) orelse {
return peek_buf[0..peek_len];
};
const peek_end = peek_len + data.len;
@memcpy(peek_buf[peek_len..peek_end], data);
self._peek_len = peek_end;
if (peek_end > 100) {
return peek_buf[peek_len..peek_end];
}
}
}
}; };
// Pooled and re-used when creating a request // Pooled and re-used when creating a request
const State = struct { const State = struct {
// used for reading chunks of payload data. // We might be asked to peek at the response, i.e. to sniff the mime type.
// This will require storing any peeked data so that, later, if we stream
// the body, we can present a cohesive body.
peek_buf: []u8,
// Used for reading chunks of payload data.
read_buf: []u8, read_buf: []u8,
// use for writing data. If you're wondering why BOTH a read_buf and a // Used for writing data. If you're wondering why BOTH a read_buf and a
// write_buf, even though HTTP is req -> resp, it's for TLS, which has // write_buf, even though HTTP is req -> resp, it's for TLS, which has
// bidirectional data. // bidirectional data.
write_buf: []u8, write_buf: []u8,
@@ -1561,7 +1618,10 @@ const State = struct {
// response headers. // response headers.
arena: ArenaAllocator, arena: ArenaAllocator,
fn init(allocator: Allocator, header_size: usize, buf_size: usize) !State { fn init(allocator: Allocator, header_size: usize, peek_size: usize, buf_size: usize) !State {
const peek_buf = try allocator.alloc(u8, peek_size);
errdefer allocator.free(peek_buf);
const read_buf = try allocator.alloc(u8, buf_size); const read_buf = try allocator.alloc(u8, buf_size);
errdefer allocator.free(read_buf); errdefer allocator.free(read_buf);
@@ -1572,6 +1632,7 @@ const State = struct {
errdefer allocator.free(header_buf); errdefer allocator.free(header_buf);
return .{ return .{
.peek_buf = peek_buf,
.read_buf = read_buf, .read_buf = read_buf,
.write_buf = write_buf, .write_buf = write_buf,
.header_buf = header_buf, .header_buf = header_buf,
@@ -1585,6 +1646,7 @@ const State = struct {
fn deinit(self: *State) void { fn deinit(self: *State) void {
const allocator = self.arena.child_allocator; const allocator = self.arena.child_allocator;
allocator.free(self.peek_buf);
allocator.free(self.read_buf); allocator.free(self.read_buf);
allocator.free(self.write_buf); allocator.free(self.write_buf);
allocator.free(self.header_buf); allocator.free(self.header_buf);
@@ -1611,7 +1673,7 @@ const StatePool = struct {
for (0..count) |i| { for (0..count) |i| {
const state = try allocator.create(State); const state = try allocator.create(State);
errdefer allocator.destroy(state); errdefer allocator.destroy(state);
state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, BUFFER_LEN); state.* = try State.init(allocator, MAX_HEADER_LINE_LEN, PEEK_BUF_LEN, BUFFER_LEN);
states[i] = state; states[i] = state;
started += 1; started += 1;
} }
@@ -1662,7 +1724,7 @@ const StatePool = struct {
const testing = @import("../testing.zig"); const testing = @import("../testing.zig");
test "HttpClient Reader: fuzz" { test "HttpClient Reader: fuzz" {
var state = try State.init(testing.allocator, 1024, 1024); var state = try State.init(testing.allocator, 1024, 1024, 100);
defer state.deinit(); defer state.deinit();
var res = TestResponse.init(); var res = TestResponse.init();
@@ -1773,18 +1835,23 @@ test "HttpClient: sync connect error" {
} }
test "HttpClient: sync no body" { test "HttpClient: sync no body" {
var client = try testClient(); for (0..2) |i| {
defer client.deinit(); var client = try testClient();
defer client.deinit();
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/simple"); const uri = try Uri.parse("http://127.0.0.1:9582/http_client/simple");
var req = try client.request(.GET, &uri); var req = try client.request(.GET, &uri);
var res = try req.sendSync(.{}); var res = try req.sendSync(.{});
try testing.expectEqual(null, try res.next()); if (i == 0) {
try testing.expectEqual(200, res.header.status); try testing.expectEqual("", try res.peek());
try testing.expectEqual(2, res.header.count()); }
try testing.expectEqual("close", res.header.get("connection")); try testing.expectEqual(null, try res.next());
try testing.expectEqual("0", res.header.get("content-length")); try testing.expectEqual(200, res.header.status);
try testing.expectEqual(2, res.header.count());
try testing.expectEqual("close", res.header.get("connection"));
try testing.expectEqual("0", res.header.get("content-length"));
}
} }
test "HttpClient: sync tls no body" { test "HttpClient: sync tls no body" {
@@ -1804,21 +1871,26 @@ test "HttpClient: sync tls no body" {
} }
test "HttpClient: sync with body" { test "HttpClient: sync with body" {
var client = try testClient(); for (0..2) |i| {
defer client.deinit(); var client = try testClient();
defer client.deinit();
const uri = try Uri.parse("http://127.0.0.1:9582/http_client/echo"); const uri = try Uri.parse("http://127.0.0.1:9582/http_client/echo");
var req = try client.request(.GET, &uri); var req = try client.request(.GET, &uri);
var res = try req.sendSync(.{}); var res = try req.sendSync(.{});
try testing.expectEqual("over 9000!", try res.next()); if (i == 0) {
try testing.expectEqual(201, res.header.status); try testing.expectEqual("over 9000!", try res.peek());
try testing.expectEqual(5, res.header.count()); }
try testing.expectEqual("close", res.header.get("connection")); try testing.expectEqual("over 9000!", try res.next());
try testing.expectEqual("10", res.header.get("content-length")); try testing.expectEqual(201, res.header.status);
try testing.expectEqual("127.0.0.1", res.header.get("_host")); try testing.expectEqual(5, res.header.count());
try testing.expectEqual("Close", res.header.get("_connection")); try testing.expectEqual("close", res.header.get("connection"));
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent")); try testing.expectEqual("10", res.header.get("content-length"));
try testing.expectEqual("127.0.0.1", res.header.get("_host"));
try testing.expectEqual("Close", res.header.get("_connection"));
try testing.expectEqual("Lightpanda/1.0", res.header.get("_user-agent"));
}
} }
test "HttpClient: sync tls with body" { test "HttpClient: sync tls with body" {

View File

@@ -24,6 +24,17 @@ pub const expectError = std.testing.expectError;
pub const expectString = std.testing.expectEqualStrings; pub const expectString = std.testing.expectEqualStrings;
pub const expectEqualSlices = std.testing.expectEqualSlices; pub const expectEqualSlices = std.testing.expectEqualSlices;
// sometimes it's super useful to have an arena you don't really care about
// in a test. Like, you need a mutable string, so you just want to dupe a
// string literal. It has nothing to do with the code under test, it's just
// infrastructure for the test itself.
pub var arena_instance = std.heap.ArenaAllocator.init(std.heap.c_allocator);
pub const arena_allocator = arena_instance.allocator();
pub fn reset() void {
_ = arena_instance.reset(.{ .retain_capacity = {} });
}
const App = @import("app.zig").App; const App = @import("app.zig").App;
const parser = @import("browser/netsurf.zig"); const parser = @import("browser/netsurf.zig");