Change to linear scaling for renderer.

With the previous exponential approach, a deep site (the deepest element in
amazon's product page is 36 levels deep) would be unrealistic.
This commit is contained in:
Karl Seguin
2025-12-10 16:39:27 +08:00
parent 27e58181fb
commit 9c8299f13f
2 changed files with 28 additions and 28 deletions

View File

@@ -765,8 +765,8 @@ fn getElementDimensions(self: *Element, page: *Page) !struct { width: f64, heigh
const tag = self.getTag(); const tag = self.getTag();
// Root containers get large default size to contain descendant positions. // Root containers get large default size to contain descendant positions.
// With calculateDocumentPosition using 10x multipliers per level, deep trees // With calculateDocumentPosition using linear depth scaling (100px per level),
// can position elements at y=millions, so we need a large container height. // even very deep trees (100 levels) stay within 10,000px.
// 100M pixels is plausible for very long documents. // 100M pixels is plausible for very long documents.
if (tag == .html or tag == .body) { if (tag == .html or tag == .body) {
if (width == 5.0) width = 1920.0; if (width == 5.0) width = 1920.0;
@@ -843,51 +843,51 @@ pub fn getClientRects(self: *Element, page: *Page) ![]DOMRect {
return ptr[0..1]; return ptr[0..1];
} }
// Calculates a pseudo-position in the document using an efficient heuristic. // Calculates a pseudo-position in the document using linear depth scaling.
// //
// Instead of walking the entire DOM tree (which would be O(total_nodes)), this // This approach uses a fixed pixel offset per depth level (100px) plus sibling
// function walks UP the tree counting previous siblings at each level. Each level // position within that level. This keeps positions reasonable even for very deep
// uses exponential weighting (10x per depth level) to preserve document order. // DOM trees (e.g., Amazon product pages can be 36+ levels deep).
//
// This gives O(depth * avg_siblings) complexity while maintaining relative positioning
// that's useful for scraping and understanding element flow in the document.
// //
// Example: // Example:
// <body> → position 0 // <body> → position 0 (depth 0)
// <div> → position 0 (0 siblings at level 1) // <div> → position 100 (depth 1, 0 siblings)
// <span></span> → position 0 (0 siblings at level 2) // <span></span> → position 200 (depth 2, 0 siblings)
// <span></span> → position 1 (1 sibling at level 2) // <span></span> → position 201 (depth 2, 1 sibling)
// </div> // </div>
// <div> → position 10 (1 sibling at level 1, weighted by 10) // <div> → position 101 (depth 1, 1 sibling)
// <p></p> → position 10 (0 siblings at level 2, parent has 10) // <p></p> → position 200 (depth 2, 0 siblings)
// </div> // </div>
// </body> // </body>
// //
// Trade-offs: // Trade-offs:
// - Much faster than full tree-walking for deep/large DOMs // - O(depth) complexity, very fast
// - Positions reflect document order and parent-child relationships // - Linear scaling: 36 levels ≈ 3,600px, 100 levels ≈ 10,000px
// - Keeps positions within reasonable bounds (10-level deep tree → ~10M pixels) // - Rough document order preserved (depth dominates, siblings differentiate)
// - Not pixel-accurate, but sufficient for layout heuristics // - Fits comfortably in realistic document heights
fn calculateDocumentPosition(node: *Node) f64 { fn calculateDocumentPosition(node: *Node) f64 {
var position: f64 = 0.0; var depth: f64 = 0.0;
var multiplier: f64 = 1.0; var sibling_offset: f64 = 0.0;
var current = node; var current = node;
while (current.parentNode()) |parent| { // Count siblings at the immediate level
var count: f64 = 0.0; if (current.parentNode()) |parent| {
var sibling = parent.firstChild(); var sibling = parent.firstChild();
while (sibling) |s| { while (sibling) |s| {
if (s == current) break; if (s == current) break;
count += 1.0; sibling_offset += 1.0;
sibling = s.nextSibling(); sibling = s.nextSibling();
} }
}
position += count * multiplier; // Count depth from root
multiplier *= 10.0; while (current.parentNode()) |parent| {
depth += 1.0;
current = parent; current = parent;
} }
return position; // Each depth level = 100px, siblings add within that level
return (depth * 100.0) + sibling_offset;
} }
const GetElementsByTagNameResult = union(enum) { const GetElementsByTagNameResult = union(enum) {

View File

@@ -650,7 +650,7 @@ test "cdp.dom: getBoxModel" {
.params = .{ .nodeId = 6 }, .params = .{ .nodeId = 6 },
}); });
try ctx.expectSentResult(.{ .model = BoxModel{ try ctx.expectSentResult(.{ .model = BoxModel{
.content = Quad{ 0.0, 0.0, 5.0, 0.0, 5.0, 5.0, 0.0, 5.0 }, .content = Quad{ 0.0, 200.0, 5.0, 200.0, 5.0, 205.0, 0.0, 205.0 },
.padding = Quad{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, .padding = Quad{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },
.border = Quad{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, .border = Quad{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },
.margin = Quad{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, .margin = Quad{ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 },