mirror of
https://github.com/lightpanda-io/browser.git
synced 2025-10-29 07:03:29 +00:00
Merge pull request #613 from lightpanda-io/css_selector_parsing_tweaks
"Improve" css selector parsing
This commit is contained in:
@@ -29,6 +29,8 @@ const PseudoClass = selector.PseudoClass;
|
|||||||
const AttributeOP = selector.AttributeOP;
|
const AttributeOP = selector.AttributeOP;
|
||||||
const Combinator = selector.Combinator;
|
const Combinator = selector.Combinator;
|
||||||
|
|
||||||
|
const REPLACEMENT_CHARACTER = &.{ 239, 191, 189 };
|
||||||
|
|
||||||
pub const ParseError = error{
|
pub const ParseError = error{
|
||||||
ExpectedSelector,
|
ExpectedSelector,
|
||||||
ExpectedIdentifier,
|
ExpectedIdentifier,
|
||||||
@@ -217,22 +219,31 @@ pub const Parser = struct {
|
|||||||
// parseName parses a name (which is like an identifier, but doesn't have
|
// parseName parses a name (which is like an identifier, but doesn't have
|
||||||
// extra restrictions on the first character).
|
// extra restrictions on the first character).
|
||||||
fn parseName(p: *Parser, w: anytype) ParseError!void {
|
fn parseName(p: *Parser, w: anytype) ParseError!void {
|
||||||
|
const sel = p.s;
|
||||||
|
const sel_len = sel.len;
|
||||||
|
|
||||||
var i = p.i;
|
var i = p.i;
|
||||||
var ok = false;
|
var ok = false;
|
||||||
|
|
||||||
while (i < p.s.len) {
|
while (i < sel_len) {
|
||||||
const c = p.s[i];
|
const c = sel[i];
|
||||||
|
|
||||||
if (nameChar(c)) {
|
if (nameChar(c)) {
|
||||||
const start = i;
|
const start = i;
|
||||||
while (i < p.s.len and nameChar(p.s[i])) i += 1;
|
while (i < sel_len and nameChar(sel[i])) i += 1;
|
||||||
w.writeAll(p.s[start..i]) catch return ParseError.WriteError;
|
w.writeAll(sel[start..i]) catch return ParseError.WriteError;
|
||||||
ok = true;
|
ok = true;
|
||||||
} else if (c == '\\') {
|
} else if (c == '\\') {
|
||||||
p.i = i;
|
p.i = i;
|
||||||
try p.parseEscape(w);
|
try p.parseEscape(w);
|
||||||
i = p.i;
|
i = p.i;
|
||||||
ok = true;
|
ok = true;
|
||||||
|
} else if (c == 0) {
|
||||||
|
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
|
||||||
|
i += 1;
|
||||||
|
if (i == sel_len) {
|
||||||
|
ok = true;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// default:
|
// default:
|
||||||
break;
|
break;
|
||||||
@@ -246,33 +257,52 @@ pub const Parser = struct {
|
|||||||
// parseEscape parses a backslash escape.
|
// parseEscape parses a backslash escape.
|
||||||
// The returned string is owned by the caller.
|
// The returned string is owned by the caller.
|
||||||
fn parseEscape(p: *Parser, w: anytype) ParseError!void {
|
fn parseEscape(p: *Parser, w: anytype) ParseError!void {
|
||||||
if (p.s.len < p.i + 2 or p.s[p.i] != '\\') {
|
const sel = p.s;
|
||||||
return ParseError.InvalidEscape;
|
const sel_len = sel.len;
|
||||||
|
|
||||||
|
if (sel_len < p.i + 2 or sel[p.i] != '\\') {
|
||||||
|
p.i += 1;
|
||||||
|
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const start = p.i + 1;
|
const start = p.i + 1;
|
||||||
const c = p.s[start];
|
const c = sel[start];
|
||||||
if (ascii.isWhitespace(c)) return ParseError.EscapeLineEndingOutsideString;
|
|
||||||
|
|
||||||
// unicode escape (hex)
|
// unicode escape (hex)
|
||||||
if (ascii.isHex(c)) {
|
if (ascii.isHex(c)) {
|
||||||
var i: usize = start;
|
var i: usize = start;
|
||||||
while (i < start + 6 and i < p.s.len and ascii.isHex(p.s[i])) {
|
while (i < start + 6 and i < sel_len and ascii.isHex(sel[i])) {
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
const v = std.fmt.parseUnsigned(u21, p.s[start..i], 16) catch return ParseError.InvalidUnicode;
|
|
||||||
if (p.s.len > i) {
|
const v = std.fmt.parseUnsigned(u21, sel[start..i], 16) catch {
|
||||||
switch (p.s[i]) {
|
p.i = i;
|
||||||
'\r' => {
|
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
|
||||||
i += 1;
|
return;
|
||||||
if (p.s.len > i and p.s[i] == '\n') i += 1;
|
};
|
||||||
},
|
|
||||||
' ', '\t', '\n', std.ascii.control_code.ff => i += 1,
|
if (sel_len >= i) {
|
||||||
else => {},
|
if (sel_len > i) {
|
||||||
|
switch (sel[i]) {
|
||||||
|
'\r' => {
|
||||||
|
i += 1;
|
||||||
|
if (sel_len > i and sel[i] == '\n') i += 1;
|
||||||
|
},
|
||||||
|
' ', '\t', '\n', std.ascii.control_code.ff => i += 1,
|
||||||
|
else => {},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
p.i = i;
|
p.i = i;
|
||||||
|
if (v == 0) {
|
||||||
|
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
|
||||||
|
return;
|
||||||
|
}
|
||||||
var buf: [4]u8 = undefined;
|
var buf: [4]u8 = undefined;
|
||||||
const ln = std.unicode.utf8Encode(v, &buf) catch return ParseError.InvalidUnicode;
|
const ln = std.unicode.utf8Encode(v, &buf) catch {
|
||||||
|
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
|
||||||
|
return;
|
||||||
|
};
|
||||||
w.writeAll(buf[0..ln]) catch return ParseError.WriteError;
|
w.writeAll(buf[0..ln]) catch return ParseError.WriteError;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -280,7 +310,7 @@ pub const Parser = struct {
|
|||||||
|
|
||||||
// Return the literal character after the backslash.
|
// Return the literal character after the backslash.
|
||||||
p.i += 2;
|
p.i += 2;
|
||||||
w.writeAll(p.s[start .. start + 1]) catch return ParseError.WriteError;
|
w.writeByte(sel[start]) catch return ParseError.WriteError;
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseIDSelector parses a selector that matches by id attribute.
|
// parseIDSelector parses a selector that matches by id attribute.
|
||||||
@@ -383,20 +413,23 @@ pub const Parser = struct {
|
|||||||
|
|
||||||
// parseString parses a single- or double-quoted string.
|
// parseString parses a single- or double-quoted string.
|
||||||
fn parseString(p: *Parser, writer: anytype) ParseError!void {
|
fn parseString(p: *Parser, writer: anytype) ParseError!void {
|
||||||
var i = p.i;
|
const sel = p.s;
|
||||||
if (p.s.len < i + 2) return ParseError.ExpectedString;
|
const sel_len = sel.len;
|
||||||
|
|
||||||
const quote = p.s[i];
|
var i = p.i;
|
||||||
|
if (sel_len < i + 2) return ParseError.ExpectedString;
|
||||||
|
|
||||||
|
const quote = sel[i];
|
||||||
i += 1;
|
i += 1;
|
||||||
|
|
||||||
loop: while (i < p.s.len) {
|
loop: while (i < sel_len) {
|
||||||
switch (p.s[i]) {
|
switch (sel[i]) {
|
||||||
'\\' => {
|
'\\' => {
|
||||||
if (p.s.len > i + 1) {
|
if (sel_len > i + 1) {
|
||||||
const c = p.s[i + 1];
|
const c = sel[i + 1];
|
||||||
switch (c) {
|
switch (c) {
|
||||||
'\r' => {
|
'\r' => {
|
||||||
if (p.s.len > i + 2 and p.s[i + 2] == '\n') {
|
if (sel_len > i + 2 and sel[i + 2] == '\n') {
|
||||||
i += 3;
|
i += 3;
|
||||||
continue :loop;
|
continue :loop;
|
||||||
}
|
}
|
||||||
@@ -418,17 +451,17 @@ pub const Parser = struct {
|
|||||||
else => |c| {
|
else => |c| {
|
||||||
if (c == quote) break :loop;
|
if (c == quote) break :loop;
|
||||||
const start = i;
|
const start = i;
|
||||||
while (i < p.s.len) {
|
while (i < sel_len) {
|
||||||
const cc = p.s[i];
|
const cc = sel[i];
|
||||||
if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break;
|
if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break;
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
writer.writeAll(p.s[start..i]) catch return ParseError.WriteError;
|
writer.writeAll(sel[start..i]) catch return ParseError.WriteError;
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i >= p.s.len) return ParseError.InvalidString;
|
if (i >= sel_len) return ParseError.InvalidString;
|
||||||
|
|
||||||
// Consume the final quote.
|
// Consume the final quote.
|
||||||
i += 1;
|
i += 1;
|
||||||
|
|||||||
Reference in New Issue
Block a user