From bcedbc845e7f5f0f92d2b0227d51edbe5b96f3a5 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 17 Feb 2025 16:39:15 +0100 Subject: [PATCH 01/21] ci: add puppeteer regression test --- .github/workflows/zig-test.yml | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/.github/workflows/zig-test.yml b/.github/workflows/zig-test.yml index ec061993..97d6c014 100644 --- a/.github/workflows/zig-test.yml +++ b/.github/workflows/zig-test.yml @@ -69,9 +69,6 @@ jobs: zig-build-release: name: zig build release - # Don't run the CI on PR - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest steps: @@ -86,6 +83,14 @@ jobs: - name: zig build release run: zig build -Doptimize=ReleaseSafe -Dengine=v8 + - name: upload artifact + uses: actions/upload-artifact@v4 + with: + name: lightpanda-build-release + path: | + zig-out/bin/lightpanda + retention-days: 1 + zig-test: name: zig test @@ -146,6 +151,10 @@ jobs: name: demo-puppeteer needs: zig-build-dev + env: + MAX_MEMORY: 23456 + MAX_AVG_DURATION: 22 + runs-on: ubuntu-latest steps: @@ -159,12 +168,21 @@ jobs: - name: download artifact uses: actions/download-artifact@v4 with: - name: lightpanda-build-dev + name: lightpanda-build-release - run: chmod a+x ./lightpanda - name: run puppeteer run: | - python3 -m http.server 1234 -d ./public & - ./lightpanda & - RUNS=2 npm run bench-puppeteer-cdp + go run ws/main.go & + /usr/bin/time -f'%M' -omrs.out ./lightpanda & + RUNS=100 npm run bench-puppeteer-cdp |tee output + + - name: memory regression + run: | + test "`cat mrs.out`" -le "$MAX_MEMORY" + + - name: duration regression + run: | + test "`cat output|grep 'avg run'|sed 's/avg run duration (ms) //'`" -le "$MAX_AVG_DURATION" + From 9894cceeaa02a0a1a216e842e4feeacbc0da5bd3 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Mon, 17 Feb 2025 16:42:25 +0100 Subject: [PATCH 02/21] ci: extract end-to-end test on its own file --- .github/workflows/e2e-test.yml | 107 +++++++++++++++++++++++++++++++++ .github/workflows/zig-test.yml | 65 -------------------- 2 files changed, 107 insertions(+), 65 deletions(-) create mode 100644 .github/workflows/e2e-test.yml diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml new file mode 100644 index 00000000..0f21efe8 --- /dev/null +++ b/.github/workflows/e2e-test.yml @@ -0,0 +1,107 @@ +name: e2e-test + +on: + push: + branches: + - main + paths: + - "build.zig" + - "src/**/*.zig" + - "src/*.zig" + - "vendor/zig-js-runtime" + - ".github/**" + - "vendor/**" + pull_request: + + # By default GH trigger on types opened, synchronize and reopened. + # see https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request + # Since we skip the job when the PR is in draft state, we want to force CI + # running when the PR is marked ready_for_review w/o other change. + # see https://github.com/orgs/community/discussions/25722#discussioncomment-3248917 + types: [opened, synchronize, reopened, ready_for_review] + + paths: + - ".github/**" + - "build.zig" + - "src/**/*.zig" + - "src/*.zig" + - "vendor/**" + - ".github/**" + - "vendor/**" + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + zig-build-release: + name: zig build release + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + # fetch submodules recusively, to get zig-js-runtime submodules also. + submodules: recursive + + - uses: ./.github/actions/install + + - name: zig build release + run: zig build -Doptimize=ReleaseSafe -Dengine=v8 + + - name: upload artifact + uses: actions/upload-artifact@v4 + with: + name: lightpanda-build-release + path: | + zig-out/bin/lightpanda + retention-days: 1 + + puppeteer: + name: puppeteer + needs: zig-build-release + + env: + MAX_MEMORY: 24000 + MAX_AVG_DURATION: 23 + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + repository: 'lightpanda-io/demo' + fetch-depth: 0 + + - run: npm install + + - name: download artifact + uses: actions/download-artifact@v4 + with: + name: lightpanda-build-release + + - run: chmod a+x ./lightpanda + + - name: run puppeteer + run: | + python3 -m http.server 1234 -d ./public & echo $! > PYTHON.pid + ./lightpanda & echo $! > LPD.pid + RUNS=100 npm run bench-puppeteer-cdp > puppeteer.out || exit 1 + cat /proc/`cat LPD.pid`/status |grep VmHWM|grep -oP '\d+' > LPD.VmHWM + kill `cat LPD.pid` `cat PYTHON.pid` + + - name: puppeteer result + run: cat puppeteer.out + + - name: memory regression + run: | + export LPD_VmHWM=`cat LPD.VmHWM` + echo "Peak resident set size: $LPD_VmHWM" + test "$LPD_VmHWM" -le "$MAX_MEMORY" + + - name: duration regression + run: | + export PUPPETEER_AVG_DURATION=`cat puppeteer.out|grep 'avg run'|sed 's/avg run duration (ms) //'` + echo "puppeteer avg duration: $PUPPETEER_AVG_DURATION" + test "$PUPPETEER_AVG_DURATION" -le "$MAX_AVG_DURATION" + diff --git a/.github/workflows/zig-test.yml b/.github/workflows/zig-test.yml index 97d6c014..ef75a006 100644 --- a/.github/workflows/zig-test.yml +++ b/.github/workflows/zig-test.yml @@ -66,31 +66,6 @@ jobs: zig-out/bin/lightpanda retention-days: 1 - zig-build-release: - name: zig build release - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - # fetch submodules recusively, to get zig-js-runtime submodules also. - submodules: recursive - - - uses: ./.github/actions/install - - - name: zig build release - run: zig build -Doptimize=ReleaseSafe -Dengine=v8 - - - name: upload artifact - uses: actions/upload-artifact@v4 - with: - name: lightpanda-build-release - path: | - zig-out/bin/lightpanda - retention-days: 1 - zig-test: name: zig test @@ -146,43 +121,3 @@ jobs: - name: format and send json result run: /perf-fmt bench-browser ${{ github.sha }} bench.json - - demo-puppeteer: - name: demo-puppeteer - needs: zig-build-dev - - env: - MAX_MEMORY: 23456 - MAX_AVG_DURATION: 22 - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - with: - repository: 'lightpanda-io/demo' - fetch-depth: 0 - - - run: npm install - - - name: download artifact - uses: actions/download-artifact@v4 - with: - name: lightpanda-build-release - - - run: chmod a+x ./lightpanda - - - name: run puppeteer - run: | - go run ws/main.go & - /usr/bin/time -f'%M' -omrs.out ./lightpanda & - RUNS=100 npm run bench-puppeteer-cdp |tee output - - - name: memory regression - run: | - test "`cat mrs.out`" -le "$MAX_MEMORY" - - - name: duration regression - run: | - test "`cat output|grep 'avg run'|sed 's/avg run duration (ms) //'`" -le "$MAX_AVG_DURATION" - From dc1d5930199411812d2ea445b708482f56ffd418 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 10:57:02 +0100 Subject: [PATCH 03/21] ci: adjust memory regression max values --- .github/workflows/e2e-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 0f21efe8..49b93869 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -62,8 +62,8 @@ jobs: needs: zig-build-release env: - MAX_MEMORY: 24000 - MAX_AVG_DURATION: 23 + MAX_MEMORY: 25000 + MAX_AVG_DURATION: 24 runs-on: ubuntu-latest From 03355f6a4ac73acfb5241fa6ef4c8e5fae2df1a3 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:01:52 +0100 Subject: [PATCH 04/21] readme: remove useless badges --- README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/README.md b/README.md index f8471029..c43894f6 100644 --- a/README.md +++ b/README.md @@ -7,20 +7,12 @@

lightpanda.io

- -[![Commit Activity](https://img.shields.io/github/commit-activity/m/lightpanda-io/browser)](https://github.com/lightpanda-io/browser/commits/main) [![License](https://img.shields.io/github/license/lightpanda-io/browser)](https://github.com/lightpanda-io/browser/blob/main/LICENSE) [![Twitter Follow](https://img.shields.io/twitter/follow/lightpanda_io)](https://twitter.com/lightpanda_io) [![GitHub stars](https://img.shields.io/github/stars/lightpanda-io/browser)](https://github.com/lightpanda-io/browser)
-
- -lightpanda-io%2Fbrowser | Trendshift - -
- Lightpanda is the open-source browser made for headless usage: - Javascript execution From cd429f59353a474d50df77dc7ec3150c6166be95 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:06:51 +0100 Subject: [PATCH 05/21] readme: fix binary name --- README.md | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index c43894f6..ebd04875 100644 --- a/README.md +++ b/README.md @@ -36,29 +36,22 @@ You can download the last binary from the [nightly builds](https://github.com/lightpanda-io/browser/releases/tag/nightly) for Linux x86_64 and MacOS aarch64. +*For linux* ```console -# Download the binary -$ wget https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-x86_64-linux -$ chmod a+x ./lightpanda-x86_64-linux -$ ./lightpanda-x86_64-linux -h -usage: ./lightpanda-x86_64-linux [options] [URL] +$ wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-x86_64-linux +$ chmod a+x ./lightpanda +``` - start Lightpanda browser - - * if an url is provided the browser will fetch the page and exit - * otherwhise the browser starts a CDP server - - -h, --help Print this help message and exit. - --host Host of the CDP server (default "127.0.0.1") - --port Port of the CDP server (default "9222") - --timeout Timeout for incoming connections of the CDP server (in seconds, default "3") - --dump Dump document in stdout (fetch mode only) +*For MacOS* +```console +$ wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-aarch64-macos +$ chmod a+x ./lightpanda ``` ### Dump an URL ```console -$ ./lightpanda-x86_64-linux --dump https://lightpanda.io +$ ./lightpanda --dump https://lightpanda.io info(browser): GET https://lightpanda.io/ http.Status.ok info(browser): fetch script https://api.website.lightpanda.io/js/script.js: http.Status.ok info(browser): eval remote https://api.website.lightpanda.io/js/script.js: TypeError: Cannot read properties of undefined (reading 'pushState') @@ -68,7 +61,7 @@ info(browser): eval remote https://api.website.lightpanda.io/js/script.js: TypeE ### Start a CDP server ```console -$ ./lightpanda-x86_64-linux --host 127.0.0.1 --port 9222 +$ ./lightpanda --host 127.0.0.1 --port 9222 info(websocket): starting blocking worker to listen on 127.0.0.1:9222 info(server): accepting new conn... ``` From f8d01e159604e98c1d03bc9938dcdfd17f0b0b1e Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:15:06 +0100 Subject: [PATCH 06/21] readme: update exemple t odump links --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index ebd04875..406d55ae 100644 --- a/README.md +++ b/README.md @@ -83,10 +83,20 @@ const browser = await puppeteer.connect({ const context = await browser.createBrowserContext(); const page = await context.newPage(); +// Dump all the links from the page. await page.goto('https://wikipedia.com/'); +const links = await page.evaluate(() => { + return Array.from(document.querySelectorAll('a')).map(row => { + return row.getAttribute('href'); + }); +}); + +console.log(links); + await page.close(); await context.close(); +await browser.disconnect(); ``` ## Build from sources From 689dddd11a746d097aa79a8953a1d83a5f05e1b4 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:19:02 +0100 Subject: [PATCH 07/21] readme: allow copy/paste install instruction --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 406d55ae..395af948 100644 --- a/README.md +++ b/README.md @@ -38,14 +38,14 @@ Linux x86_64 and MacOS aarch64. *For linux* ```console -$ wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-x86_64-linux -$ chmod a+x ./lightpanda +wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-x86_64-linux && \ +chmod a+x ./lightpanda ``` *For MacOS* ```console -$ wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-aarch64-macos -$ chmod a+x ./lightpanda +wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-aarch64-macos && \ +chmod a+x ./lightpanda ``` ### Dump an URL From 0a1e6623c8d9105f031d5c457a40a85740b2e6e9 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:20:56 +0100 Subject: [PATCH 08/21] readme: allow examples copy/paste --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 395af948..7a91d8ea 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,9 @@ chmod a+x ./lightpanda ### Dump an URL ```console -$ ./lightpanda --dump https://lightpanda.io +./lightpanda --dump https://lightpanda.io +``` +```console info(browser): GET https://lightpanda.io/ http.Status.ok info(browser): fetch script https://api.website.lightpanda.io/js/script.js: http.Status.ok info(browser): eval remote https://api.website.lightpanda.io/js/script.js: TypeError: Cannot read properties of undefined (reading 'pushState') @@ -61,7 +63,9 @@ info(browser): eval remote https://api.website.lightpanda.io/js/script.js: TypeE ### Start a CDP server ```console -$ ./lightpanda --host 127.0.0.1 --port 9222 +./lightpanda --host 127.0.0.1 --port 9222 +``` +```console info(websocket): starting blocking worker to listen on 127.0.0.1:9222 info(server): accepting new conn... ``` From 27e907491be9f7931f147804d5bfe83b734b6de0 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:25:04 +0100 Subject: [PATCH 09/21] readme: remove text duplication --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7a91d8ea..b6e3038a 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Lightpanda is the open-source browser made for headless usage: - Support of Web APIs (partial, WIP) - Compatible with Playwright, Puppeteer through CDP (WIP) -Fast web automation for AI agents, LLM training, scraping and testing with minimal memory footprint: +Fast web automation for AI agents, LLM training, scraping and testing: - Ultra-low memory footprint (9x less than Chrome) - Exceptionally fast execution (11x faster than Chrome) & instant startup From 1a8cc2d019d31e491463409608368d395ea31074 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:31:52 +0100 Subject: [PATCH 10/21] readme: adjust text --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b6e3038a..b8b50b4b 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,8 @@ Lightpanda is the open-source browser made for headless usage: Fast web automation for AI agents, LLM training, scraping and testing: - Ultra-low memory footprint (9x less than Chrome) -- Exceptionally fast execution (11x faster than Chrome) & instant startup +- Exceptionally fast execution (11x faster than Chrome) +- Instant startup From cdcc5e106f384cadd5cb6972d7a814ebc75b7144 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:32:02 +0100 Subject: [PATCH 11/21] readme: use curl to download binary --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b8b50b4b..f8664bd5 100644 --- a/README.md +++ b/README.md @@ -39,13 +39,13 @@ Linux x86_64 and MacOS aarch64. *For linux* ```console -wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-x86_64-linux && \ +curl -L -o lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-x86_64-linux && \ chmod a+x ./lightpanda ``` *For MacOS* ```console -wget -O lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-aarch64-macos && \ +curl -L -o lightpanda https://github.com/lightpanda-io/browser/releases/download/nightly/lightpanda-aarch64-macos && \ chmod a+x ./lightpanda ``` From d6575faa9f04112fdb183cbcdfb634b87bee28d7 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:37:08 +0100 Subject: [PATCH 12/21] readme: fix badges --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f8664bd5..cfa97657 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@

lightpanda.io

+ [![License](https://img.shields.io/github/license/lightpanda-io/browser)](https://github.com/lightpanda-io/browser/blob/main/LICENSE) [![Twitter Follow](https://img.shields.io/twitter/follow/lightpanda_io)](https://twitter.com/lightpanda_io) [![GitHub stars](https://img.shields.io/github/stars/lightpanda-io/browser)](https://github.com/lightpanda-io/browser) From 14eebfe39eb7367e410d3c3e4db5abebbd2767af Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:46:48 +0100 Subject: [PATCH 13/21] readme: update benchmark image --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cfa97657..9efe6885 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Fast web automation for AI agents, LLM training, scraping and testing: - Exceptionally fast execution (11x faster than Chrome) - Instant startup - +[![](https://cdn.lightpanda.io/assets/images/benchmark_2025-02-19.png)](https://github.com/lightpanda-io/demo) See [benchmark details](https://github.com/lightpanda-io/demo). From 3e01cf19b093f61fd064afc04daf56e3720a62c3 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 11:53:32 +0100 Subject: [PATCH 14/21] readme: add benchmark details --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9efe6885..801eea27 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Fast web automation for AI agents, LLM training, scraping and testing: [![](https://cdn.lightpanda.io/assets/images/benchmark_2025-02-19.png)](https://github.com/lightpanda-io/demo) +Puppeteer requesting 100 pages from a local website on a AWS EC2 m5.large instance. See [benchmark details](https://github.com/lightpanda-io/demo). ## Quick start From 7602f155448d71c9f850e6b12a13773fecb3ab60 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 13:41:45 +0100 Subject: [PATCH 15/21] readme: move status up --- README.md | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 801eea27..bbc8d7fc 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,28 @@ await context.close(); await browser.disconnect(); ``` +## Status + +Lightpanda is still a work in progress and is currently at a Beta stage. + +:warning: You should expect most websites to fail or crash. + +Here are the key features we have implemented: + +- [x] HTTP loader +- [x] HTML parser and DOM tree (based on Netsurf libs) +- [x] Javascript support (v8) +- [x] Basic DOM APIs +- [x] Ajax + - [x] XHR API + - [x] Fetch API +- [x] DOM dump +- [x] Basic CDP/websockets server + +NOTE: There are hundreds of Web APIs. Developing a browser (even just for headless mode) is a huge task. Coverage will increase over time. + +You can also follow the progress of our Javascript support in our dedicated [zig-js-runtime](https://github.com/lightpanda-io/zig-js-runtime#development) project. + ## Build from sources ### Prerequisites @@ -262,25 +284,3 @@ If we want both Javascript and performance in a true headless browser, we need t - Not based on Chromium, Blink or WebKit - Low-level system programming language (Zig) with optimisations in mind - Opinionated: without graphical rendering - -## Status - -Lightpanda is still a work in progress and is currently at a Beta stage. - -:warning: You should expect most websites to fail or crash. - -Here are the key features we have implemented: - -- [x] HTTP loader -- [x] HTML parser and DOM tree (based on Netsurf libs) -- [x] Javascript support (v8) -- [x] Basic DOM APIs -- [x] Ajax - - [x] XHR API - - [x] Fetch API -- [x] DOM dump -- [x] Basic CDP/websockets server - -NOTE: There are hundreds of Web APIs. Developing a browser (even just for headless mode) is a huge task. Coverage will increase over time. - -You can also follow the progress of our Javascript support in our dedicated [zig-js-runtime](https://github.com/lightpanda-io/zig-js-runtime#development) project. From f986cfecff4e5043cdd993b862bf03e9c1ff7a72 Mon Sep 17 00:00:00 2001 From: Pierre Tachoire Date: Tue, 18 Feb 2025 13:50:16 +0100 Subject: [PATCH 16/21] readme: adjust image width --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bbc8d7fc..1922aa08 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,9 @@ Fast web automation for AI agents, LLM training, scraping and testing: - Exceptionally fast execution (11x faster than Chrome) - Instant startup -[![](https://cdn.lightpanda.io/assets/images/benchmark_2025-02-19.png)](https://github.com/lightpanda-io/demo) + + + Puppeteer requesting 100 pages from a local website on a AWS EC2 m5.large instance. See [benchmark details](https://github.com/lightpanda-io/demo). From 5037bd07d5c0435b1f2a3763d071b90dc20e9d04 Mon Sep 17 00:00:00 2001 From: Nicolas Rigaudiere Date: Tue, 18 Feb 2025 15:43:49 +0100 Subject: [PATCH 17/21] chore: update readme images --- README.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1922aa08..2dbd8b9d 100644 --- a/README.md +++ b/README.md @@ -25,13 +25,18 @@ Fast web automation for AI agents, LLM training, scraping and testing: - Ultra-low memory footprint (9x less than Chrome) - Exceptionally fast execution (11x faster than Chrome) - Instant startup +
+
- - - +[ +](https://github.com/lightpanda-io/demo) +  +[ +](https://github.com/lightpanda-io/demo) +
-Puppeteer requesting 100 pages from a local website on a AWS EC2 m5.large instance. -See [benchmark details](https://github.com/lightpanda-io/demo). +_Puppeteer requesting 100 pages from a local website on a AWS EC2 m5.large instance. +See [benchmark details](https://github.com/lightpanda-io/demo)._ ## Quick start From 39a9efb73bd21ec2451e0725b76a45f9ec828eea Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Wed, 19 Feb 2025 15:01:12 +0800 Subject: [PATCH 18/21] Fix server hang on client disconnect https://github.com/lightpanda-io/browser/issues/425 Add a few integration tests for the TCP server which are fast enough to be run as part of the unit tests (one of the new tests covers the above issue). --- src/browser/browser.zig | 7 ++- src/main_tests.zig | 6 -- src/server.zig | 118 +++++++++++++++++++++++++++++++++++++--- src/unit_tests.zig | 59 ++++++++++++++++---- 4 files changed, 163 insertions(+), 27 deletions(-) diff --git a/src/browser/browser.zig b/src/browser/browser.zig index a7b684b1..67f349c6 100644 --- a/src/browser/browser.zig +++ b/src/browser/browser.zig @@ -242,7 +242,12 @@ pub const Page = struct { // add global objects log.debug("setup global env", .{}); - try self.session.env.bindGlobal(&self.session.window); + + if (comptime builtin.is_test == false) { + // By not loading this during tests, we aren't required to load + // all of the interfaces into zig-js-runtime. + try self.session.env.bindGlobal(&self.session.window); + } // load polyfills try polyfill.load(self.arena.allocator(), self.session.env); diff --git a/src/main_tests.zig b/src/main_tests.zig index 8028827c..53967905 100644 --- a/src/main_tests.zig +++ b/src/main_tests.zig @@ -337,12 +337,6 @@ test { std.testing.refAllDecls(@import("generate.zig")); std.testing.refAllDecls(@import("cdp/msg.zig")); - - // Don't use refAllDecls, as this will pull in the entire project - // and break the test build. - // We should fix this. See this branch & the commit message for details: - // https://github.com/karlseguin/browser/commit/193ab5ceab3d3758ea06db04f7690460d79eb79e - _ = @import("server.zig"); } fn testJSRuntime(alloc: std.mem.Allocator) !void { diff --git a/src/server.zig b/src/server.zig index 95f51ec6..eeeb1934 100644 --- a/src/server.zig +++ b/src/server.zig @@ -211,6 +211,13 @@ const Server = struct { self.queueClose(client.socket); return; }; + if (size == 0) { + if (self.client != null) { + self.client = null; + } + self.queueAccept(); + return; + } const more = client.processData(size) catch |err| { log.err("Client Processing Error: {}\n", .{err}); @@ -1053,14 +1060,6 @@ pub fn run( timeout: u64, loop: *jsruntime.Loop, ) !void { - if (comptime builtin.is_test) { - // There's bunch of code that won't compiler in a test build (because - // it relies on a global root.Types). So we fight the compiler and make - // sure it doesn't include any of that code. Hopefully one day we can - // remove all this. - return; - } - // create socket const flags = posix.SOCK.STREAM | posix.SOCK.CLOEXEC | posix.SOCK.NONBLOCK; const listener = try posix.socket(address.any.family, flags, posix.IPPROTO.TCP); @@ -1631,6 +1630,49 @@ test "server: mask" { } } +test "server: 404" { + var c = try createTestClient(); + defer c.deinit(); + + const res = try c.httpRequest("GET /unknown HTTP/1.1\r\n\r\n"); + try testing.expectEqualStrings("HTTP/1.1 404 \r\n" ++ + "Connection: Close\r\n" ++ + "Content-Length: 9\r\n\r\n" ++ + "Not found", res); +} + +test "server: get /json/version" { + const expected_response = + "HTTP/1.1 200 OK\r\n" ++ + "Content-Length: 48\r\n" ++ + "Content-Type: application/json; charset=UTF-8\r\n\r\n" ++ + "{\"webSocketDebuggerUrl\": \"ws://127.0.0.1:9583/\"}"; + + { + // twice on the same connection + var c = try createTestClient(); + defer c.deinit(); + + const res1 = try c.httpRequest("GET /json/version HTTP/1.1\r\n\r\n"); + try testing.expectEqualStrings(expected_response, res1); + + const res2 = try c.httpRequest("GET /json/version HTTP/1.1\r\n\r\n"); + try testing.expectEqualStrings(expected_response, res2); + } + + { + // again on a new connection + var c = try createTestClient(); + defer c.deinit(); + + const res1 = try c.httpRequest("GET /json/version HTTP/1.1\r\n\r\n"); + try testing.expectEqualStrings(expected_response, res1); + + const res2 = try c.httpRequest("GET /json/version HTTP/1.1\r\n\r\n"); + try testing.expectEqualStrings(expected_response, res2); + } +} + fn assertHTTPError( expected_error: HTTPError, comptime expected_status: u16, @@ -1762,3 +1804,63 @@ const MockServer = struct { } } }; + +fn createTestClient() !TestClient { + const address = std.net.Address.initIp4([_]u8{ 127, 0, 0, 1 }, 9583); + const stream = try std.net.tcpConnectToAddress(address); + + const timeout = std.mem.toBytes(posix.timeval{ + .tv_sec = 2, + .tv_usec = 0, + }); + try posix.setsockopt(stream.handle, posix.SOL.SOCKET, posix.SO.RCVTIMEO, &timeout); + try posix.setsockopt(stream.handle, posix.SOL.SOCKET, posix.SO.SNDTIMEO, &timeout); + return .{ .stream = stream }; +} + +const TestClient = struct { + stream: std.net.Stream, + buf: [1024]u8 = undefined, + + fn deinit(self: *TestClient) void { + self.stream.close(); + } + + fn httpRequest(self: *TestClient, req: []const u8) ![]const u8 { + try self.stream.writeAll(req); + + var pos: usize = 0; + var total_length: ?usize = null; + while (true) { + pos += try self.stream.read(self.buf[pos..]); + const response = self.buf[0..pos]; + if (total_length == null) { + const header_end = std.mem.indexOf(u8, response, "\r\n\r\n") orelse continue; + const header = response[0 .. header_end + 4]; + + const cl_header = "Content-Length: "; + const start = (std.mem.indexOf(u8, header, cl_header) orelse { + return error.MissingContentLength; + }) + cl_header.len; + + const end = std.mem.indexOfScalarPos(u8, header, start, '\r') orelse { + return error.InvalidContentLength; + }; + const cl = std.fmt.parseInt(usize, header[start..end], 10) catch { + return error.InvalidContentLength; + }; + + total_length = cl + header.len; + } + + if (total_length) |tl| { + if (pos == tl) { + return response; + } + if (pos > tl) { + return error.DataExceedsContentLength; + } + } + } + } +}; diff --git a/src/unit_tests.zig b/src/unit_tests.zig index 7508821e..e4f1ac3e 100644 --- a/src/unit_tests.zig +++ b/src/unit_tests.zig @@ -18,10 +18,17 @@ const std = @import("std"); const builtin = @import("builtin"); +const parser = @import("netsurf"); const Allocator = std.mem.Allocator; +const jsruntime = @import("jsruntime"); +pub const Types = jsruntime.reflect(@import("generate.zig").Tuple(.{}){}); +pub const UserContext = @import("user_context.zig").UserContext; +// pub const IO = @import("asyncio").Wrapper(jsruntime.Loop); + pub const std_options = std.Options{ + .log_level = .err, .http_disable_tls = true, }; @@ -31,11 +38,16 @@ const BORDER = "=" ** 80; var current_test: ?[]const u8 = null; pub fn main() !void { + try parser.init(); + defer parser.deinit(); + var mem: [8192]u8 = undefined; var fba = std.heap.FixedBufferAllocator.init(&mem); - const allocator = fba.allocator(); + var loop = try jsruntime.Loop.init(allocator); + defer loop.deinit(); + const env = Env.init(allocator); defer env.deinit(allocator); @@ -47,12 +59,20 @@ pub fn main() !void { var skip: usize = 0; var leak: usize = 0; - const address = try std.net.Address.parseIp("127.0.0.1", 9582); - var listener = try address.listen(.{ .reuse_address = true }); - defer listener.deinit(); - const http_thread = try std.Thread.spawn(.{}, serverHTTP, .{&listener}); + const http_thread = blk: { + const address = try std.net.Address.parseIp("127.0.0.1", 9582); + const thread = try std.Thread.spawn(.{}, serveHTTP, .{address}); + break :blk thread; + }; defer http_thread.join(); + const cdp_thread = blk: { + const address = try std.net.Address.parseIp("127.0.0.1", 9583); + const thread = try std.Thread.spawn(.{}, serveCDP, .{ allocator, address, &loop }); + break :blk thread; + }; + defer cdp_thread.join(); + const printer = Printer.init(); printer.fmt("\r\x1b[0K", .{}); // beginning of line and clear to end of line @@ -98,7 +118,9 @@ pub fn main() !void { } if (result) |_| { - pass += 1; + if (is_unnamed_test == false) { + pass += 1; + } } else |err| switch (err) { error.SkipZigTest => { skip += 1; @@ -117,11 +139,13 @@ pub fn main() !void { }, } - if (env.verbose) { - const ms = @as(f64, @floatFromInt(ns_taken)) / 1_000_000.0; - printer.status(status, "{s} ({d:.2}ms)\n", .{ friendly_name, ms }); - } else { - printer.status(status, ".", .{}); + if (is_unnamed_test == false) { + if (env.verbose) { + const ms = @as(f64, @floatFromInt(ns_taken)) / 1_000_000.0; + printer.status(status, "{s} ({d:.2}ms)\n", .{ friendly_name, ms }); + } else { + printer.status(status, ".", .{}); + } } } @@ -294,7 +318,10 @@ fn isUnnamed(t: std.builtin.TestFn) bool { return true; } -fn serverHTTP(listener: *std.net.Server) !void { +fn serveHTTP(address: std.net.Address) !void { + var listener = try address.listen(.{ .reuse_address = true }); + defer listener.deinit(); + var read_buffer: [1024]u8 = undefined; ACCEPT: while (true) { var conn = try listener.accept(); @@ -320,6 +347,14 @@ fn serverHTTP(listener: *std.net.Server) !void { } } +fn serveCDP(allocator: Allocator, address: std.net.Address, loop: *jsruntime.Loop) !void { + const server = @import("server.zig"); + server.run(allocator, address, std.time.ns_per_s * 2, loop) catch |err| { + std.debug.print("CDP server error: {}", .{err}); + return err; + }; +} + const Response = struct { body: []const u8 = "", status: std.http.Status = .ok, From 4dbba103d40fe96a2f867c5a5a6f98b1ad0e67d4 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Mon, 17 Feb 2025 16:08:26 +0800 Subject: [PATCH 19/21] In release mode, switch from page_allocator to c_allocator --- src/main.zig | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/main.zig b/src/main.zig index c5c04996..31743c2a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -202,23 +202,13 @@ pub fn main() !void { // allocator // - in Debug mode we use the General Purpose Allocator to detect memory leaks - // - in Release mode we use the page allocator - var alloc: std.mem.Allocator = undefined; - var _gpa: ?std.heap.GeneralPurposeAllocator(.{}) = null; - if (builtin.mode == .Debug) { - _gpa = std.heap.GeneralPurposeAllocator(.{}){}; - alloc = _gpa.?.allocator(); - } else { - alloc = std.heap.page_allocator; - } - defer { - if (_gpa) |*gpa| { - switch (gpa.deinit()) { - .ok => std.debug.print("No memory leaks\n", .{}), - .leak => @panic("Memory leak"), - } - } - } + // - in Release mode we use the c allocator + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + const alloc = if (builtin.mode == .Debug) gpa.allocator() else std.heap.c_allocator; + + defer if (builtin.mode == .Debug) { + _ = gpa.detectLeaks(); + }; // args var args: std.process.ArgIterator = undefined; From 6d6b840cf6673bcff752d2a13e5fbf2a8445d5da Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 20 Feb 2025 08:42:45 +0800 Subject: [PATCH 20/21] Use $(ZIG) variable when building netsurf --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4b824f11..996e5e13 100644 --- a/Makefile +++ b/Makefile @@ -144,7 +144,7 @@ _install-netsurf: clean-netsurf BUILDDIR=$(BC_NS)/build/libdom make install && \ printf "\e[33mRunning libdom example...\e[0m\n" && \ cd examples && \ - zig cc \ + $(ZIG) cc \ -I$(ICONV)/include \ -I$(BC_NS)/include \ -L$(ICONV)/lib \ From 61a7848fd9b8cfa544edc6961fe35eb6a25b1029 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Thu, 20 Feb 2025 14:06:38 +0800 Subject: [PATCH 21/21] Use an enum for XHR's state. --- src/xhr/xhr.zig | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/xhr/xhr.zig b/src/xhr/xhr.zig index 2ad2bc26..84ae6602 100644 --- a/src/xhr/xhr.zig +++ b/src/xhr/xhr.zig @@ -103,7 +103,7 @@ pub const XMLHttpRequest = struct { ctx: ?Client.Ctx = null, method: std.http.Method, - state: u16, + state: State, url: ?[]const u8, uri: std.Uri, // request headers @@ -150,11 +150,13 @@ pub const XMLHttpRequest = struct { pub const prototype = *XMLHttpRequestEventTarget; pub const mem_guarantied = true; - pub const UNSENT: u16 = 0; - pub const OPENED: u16 = 1; - pub const HEADERS_RECEIVED: u16 = 2; - pub const LOADING: u16 = 3; - pub const DONE: u16 = 4; + const State = enum(u16) { + unsent = 0, + opened = 1, + headers_received = 2, + loading = 3, + done = 4, + }; // https://xhr.spec.whatwg.org/#response-type const ResponseType = enum { @@ -297,7 +299,7 @@ pub const XMLHttpRequest = struct { .method = undefined, .url = null, .uri = undefined, - .state = UNSENT, + .state = .unsent, .cli = userctx.httpClient, }; } @@ -347,7 +349,7 @@ pub const XMLHttpRequest = struct { } pub fn get_readyState(self: *XMLHttpRequest) u16 { - return self.state; + return @intFromEnum(self.state); } pub fn get_timeout(_: *XMLHttpRequest) u32 { @@ -367,7 +369,7 @@ pub const XMLHttpRequest = struct { } pub fn set_withCredentials(self: *XMLHttpRequest, withCredentials: bool) !void { - if (self.state != OPENED and self.state != UNSENT) return DOMError.InvalidState; + if (self.state != .opened and self.state != .unsent) return DOMError.InvalidState; if (self.send_flag) return DOMError.InvalidState; self.withCredentials = withCredentials; @@ -401,7 +403,7 @@ pub const XMLHttpRequest = struct { log.debug("open url ({s})", .{self.url.?}); self.sync = if (asyn) |b| !b else false; - self.state = OPENED; + self.state = .opened; self.dispatchEvt("readystatechange"); } @@ -477,14 +479,14 @@ pub const XMLHttpRequest = struct { } pub fn _setRequestHeader(self: *XMLHttpRequest, name: []const u8, value: []const u8) !void { - if (self.state != OPENED) return DOMError.InvalidState; + if (self.state != .opened) return DOMError.InvalidState; if (self.send_flag) return DOMError.InvalidState; return try self.headers.append(name, value); } // TODO body can be either a XMLHttpRequestBodyInit or a document pub fn _send(self: *XMLHttpRequest, alloc: std.mem.Allocator, body: ?[]const u8) !void { - if (self.state != OPENED) return DOMError.InvalidState; + if (self.state != .opened) return DOMError.InvalidState; if (self.send_flag) return DOMError.InvalidState; // The body argument provides the request body, if any, and is ignored @@ -554,7 +556,7 @@ pub const XMLHttpRequest = struct { // TODO handle override mime type - self.state = HEADERS_RECEIVED; + self.state = .headers_received; self.dispatchEvt("readystatechange"); self.response_status = @intFromEnum(self.req.?.response.status); @@ -592,7 +594,7 @@ pub const XMLHttpRequest = struct { if (prev_dispatch != null and now.since(prev_dispatch.?) < min_delay) continue; defer prev_dispatch = now; - self.state = LOADING; + self.state = .loading; self.dispatchEvt("readystatechange"); // dispatch a progress event progress. @@ -604,7 +606,7 @@ pub const XMLHttpRequest = struct { self.response_bytes = buf.items; self.send_flag = false; - self.state = DONE; + self.state = .done; self.dispatchEvt("readystatechange"); // dispatch a progress event load. @@ -666,7 +668,7 @@ pub const XMLHttpRequest = struct { self.priv_state = .done; self.err = err; - self.state = DONE; + self.state = .done; self.send_flag = false; self.dispatchEvt("readystatechange"); self.dispatchProgressEvent("error", .{}); @@ -697,7 +699,7 @@ pub const XMLHttpRequest = struct { } pub fn set_responseType(self: *XMLHttpRequest, rtype: []const u8) !void { - if (self.state == LOADING or self.state == DONE) return DOMError.InvalidState; + if (self.state == .loading or self.state == .done) return DOMError.InvalidState; if (std.mem.eql(u8, rtype, "")) { self.response_type = .Empty; @@ -735,7 +737,7 @@ pub const XMLHttpRequest = struct { return DOMError.InvalidState; } - if (self.state != DONE) return null; + if (self.state != .done) return null; // fastpath if response is previously parsed. if (self.response_obj) |obj| { @@ -761,7 +763,7 @@ pub const XMLHttpRequest = struct { // https://xhr.spec.whatwg.org/#the-response-attribute pub fn get_response(self: *XMLHttpRequest, alloc: std.mem.Allocator) !?Response { if (self.response_type == .Empty or self.response_type == .Text) { - if (self.state == LOADING or self.state == DONE) { + if (self.state == .loading or self.state == .done) { return .{ .Text = try self.get_responseText() }; } return .{ .Text = "" };