diff --git a/.github/actions/install/action.yml b/.github/actions/install/action.yml index 88a672a0..6c98f2e5 100644 --- a/.github/actions/install/action.yml +++ b/.github/actions/install/action.yml @@ -13,7 +13,7 @@ inputs: zig-v8: description: 'zig v8 version to install' required: false - default: 'v0.2.9' + default: 'v0.3.1' v8: description: 'v8 version to install' required: false diff --git a/.github/workflows/e2e-integration-test.yml b/.github/workflows/e2e-integration-test.yml index 5bb24d1f..1a0217bb 100644 --- a/.github/workflows/e2e-integration-test.yml +++ b/.github/workflows/e2e-integration-test.yml @@ -63,6 +63,6 @@ jobs: - name: run end to end integration tests run: | - ./lightpanda serve & echo $! > LPD.pid + ./lightpanda serve --log_level error & echo $! > LPD.pid go run integration/main.go kill `cat LPD.pid` diff --git a/.github/workflows/wpt.yml b/.github/workflows/wpt.yml index ecea3cc4..38405591 100644 --- a/.github/workflows/wpt.yml +++ b/.github/workflows/wpt.yml @@ -15,11 +15,11 @@ on: workflow_dispatch: jobs: - wpt: - name: web platform tests json output + wpt-build-release: + name: zig build release runs-on: ubuntu-latest - timeout-minutes: 90 + timeout-minutes: 15 steps: - uses: actions/checkout@v6 @@ -30,11 +30,85 @@ jobs: - uses: ./.github/actions/install - - name: build wpt - run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -- version + - name: zig build release + run: zig build -Dprebuilt_v8_path=v8/libc_v8.a -Doptimize=ReleaseFast -Dcpu=x86_64 -Dgit_commit=$(git rev-parse --short ${{ github.sha }}) + + - name: upload artifact + uses: actions/upload-artifact@v4 + with: + name: lightpanda-build-release + path: | + zig-out/bin/lightpanda + retention-days: 1 + + wpt-build-runner: + name: build wpt runner + + runs-on: ubuntu-latest + timeout-minutes: 90 + + steps: + - uses: actions/checkout@v6 + with: + repository: 'lightpanda-io/demo' + fetch-depth: 0 + + - run: | + cd ./wptrunner + CGO_ENABLED=0 go build + + - name: upload artifact + uses: actions/upload-artifact@v4 + with: + name: wptrunner + path: | + wptrunner/wptrunner + retention-days: 1 + + run-wpt: + name: web platform tests json output + needs: + - wpt-build-release + - wpt-build-runner + + # use a self host runner. + runs-on: lpd-bench-hetzner + timeout-minutes: 90 + + steps: + - uses: actions/checkout@v6 + with: + ref: fork + repository: 'lightpanda-io/wpt' + fetch-depth: 0 + + # The hosts are configured manually on the self host runner. + # - name: create custom hosts + # run: ./wpt make-hosts-file | sudo tee -a /etc/hosts + + - name: generate manifest + run: ./wpt manifest + + - name: download lightpanda release + uses: actions/download-artifact@v4 + with: + name: lightpanda-build-release + + - run: chmod a+x ./lightpanda + + - name: download wptrunner + uses: actions/download-artifact@v4 + with: + name: wptrunner + + - run: chmod a+x ./wptrunner - name: run test with json output - run: zig-out/bin/lightpanda-wpt --json > wpt.json + run: | + ./wpt serve 2> /dev/null & echo $! > WPT.pid + sleep 10s + ./wptrunner -lpd-path ./lightpanda -json -concurrency 1 > wpt.json + kill `cat WPT.pid` - name: write commit run: | @@ -51,7 +125,7 @@ jobs: perf-fmt: name: perf-fmt - needs: wpt + needs: run-wpt runs-on: ubuntu-latest timeout-minutes: 15 diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 5462f8f0..00000000 --- a/.gitmodules +++ /dev/null @@ -1,15 +0,0 @@ -[submodule "tests/wpt"] - path = tests/wpt - url = https://github.com/lightpanda-io/wpt -[submodule "vendor/nghttp2"] - path = vendor/nghttp2 - url = https://github.com/nghttp2/nghttp2.git -[submodule "vendor/zlib"] - path = vendor/zlib - url = https://github.com/madler/zlib.git -[submodule "vendor/curl"] - path = vendor/curl - url = https://github.com/curl/curl.git -[submodule "vendor/brotli"] - path = vendor/brotli - url = https://github.com/google/brotli diff --git a/Dockerfile b/Dockerfile index 79ae2627..75be1c9e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM debian:stable-slim ARG MINISIG=0.12 ARG ZIG_MINISIG=RWSGOq2NVecA2UPNdBUZykf1CCb147pkmdtYxgb3Ti+JO/wCYvhbAb/U ARG V8=14.0.365.4 -ARG ZIG_V8=v0.2.9 +ARG ZIG_V8=v0.3.1 ARG TARGETPLATFORM RUN apt-get update -yq && \ diff --git a/Makefile b/Makefile index 0e34a5e7..a85d4e69 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ help: # $(ZIG) commands # ------------ -.PHONY: build build-v8-snapshot build-dev run run-release shell test bench wpt data end2end +.PHONY: build build-v8-snapshot build-dev run run-release shell test bench data end2end ## Build v8 snapshot build-v8-snapshot: @@ -82,15 +82,6 @@ shell: @printf "\033[36mBuilding shell...\033[0m\n" @$(ZIG) build shell || (printf "\033[33mBuild ERROR\033[0m\n"; exit 1;) -## Run WPT tests -wpt: - @printf "\033[36mBuilding wpt...\033[0m\n" - @$(ZIG) build wpt -- $(filter-out $@,$(MAKECMDGOALS)) || (printf "\033[33mBuild ERROR\033[0m\n"; exit 1;) - -wpt-summary: - @printf "\033[36mBuilding wpt...\033[0m\n" - @$(ZIG) build wpt -- --summary $(filter-out $@,$(MAKECMDGOALS)) || (printf "\033[33mBuild ERROR\033[0m\n"; exit 1;) - ## Test - `grep` is used to filter out the huge compile command on build ifeq ($(OS), macos) test: @@ -111,13 +102,8 @@ end2end: # ------------ .PHONY: install -## Install and build dependencies for release -install: install-submodule +install: build data: cd src/data && go run public_suffix_list_gen.go > public_suffix_list.zig -## Init and update git submodule -install-submodule: - @git submodule init && \ - git submodule update diff --git a/README.md b/README.md index d7c78d16..1a860fc7 100644 --- a/README.md +++ b/README.md @@ -220,18 +220,6 @@ For **MacOS**, you need cmake and [Rust](https://rust-lang.org/tools/install/). brew install cmake ``` -### Install Git submodules - -The project uses git submodules for dependencies. - -To init or update the submodules in the `vendor/` directory: - -``` -make install-submodule -``` - -This is an alias for `git submodule init && git submodule update`. - ### Build and run You an build the entire browser with `make build` or `make build-dev` for debug @@ -281,35 +269,75 @@ make end2end Lightpanda is tested against the standardized [Web Platform Tests](https://web-platform-tests.org/). -The relevant tests cases are committed in a [dedicated repository](https://github.com/lightpanda-io/wpt) which is fetched by the `make install-submodule` command. - -All the tests cases executed are located in the `tests/wpt` sub-directory. +We use [a fork](https://github.com/lightpanda-io/wpt/tree/fork) including a custom +[`testharnessreport.js`](https://github.com/lightpanda-io/wpt/commit/01a3115c076a3ad0c84849dbbf77a6e3d199c56f). For reference, you can easily execute a WPT test case with your browser via [wpt.live](https://wpt.live). +#### Configure WPT HTTP server + +To run the test, you must clone the repository, configure the custom hosts and generate the +`MANIFEST.json` file. + +Clone the repository with the `fork` branch. +``` +git clone -b fork --depth=1 git@github.com:lightpanda-io/wpt.git +``` + +Enter into the `wpt/` dir. + +Install custom domains in your `/etc/hosts` +``` +./wpt make-hosts-file | sudo tee -a /etc/hosts +``` + +Generate `MANIFEST.json` +``` +./wpt manifest +``` +Use the [WPT's setup +guide](https://web-platform-tests.org/running-tests/from-local-system.html) for +details. + #### Run WPT test suite -To run all the tests: +An external [Go](https://go.dev) runner is provided by +[github.com/lightpanda-io/demo/](https://github.com/lightpanda-io/demo/) +repository, located into `wptrunner/` dir. +You need to clone the project first. + +First start the WPT's HTTP server from your `wpt/` clone dir. +``` +./wpt serve +``` + +Run a Lightpanda browser ``` -make wpt +zig build run -- --insecure_disable_tls_host_verification +``` + +Then you can start the wptrunner from the Demo's clone dir: +``` +cd wptrunner && go run . ``` Or one specific test: ``` -make wpt Node-childNodes.html +cd wptrunner && go run . Node-childNodes.html ``` -#### Add a new WPT test case +`wptrunner` command accepts `--summary` and `--json` options modifying output. +Also `--concurrency` define the concurrency limit. -We add new relevant tests cases files when we implemented changes in Lightpanda. +:warning: Running the whole test suite will take a long time. In this case, +it's useful to build in `releaseFast` mode to make tests faster. -To add a new test, copy the file you want from the [WPT -repo](https://github.com/web-platform-tests/wpt) into the `tests/wpt` directory. - -:warning: Please keep the original directory tree structure of `tests/wpt`. +``` +zig build -Doptimize=ReleaseFast run +``` ## Contributing diff --git a/build.zig b/build.zig index bcf34bee..7e3a2817 100644 --- a/build.zig +++ b/build.zig @@ -50,8 +50,11 @@ pub fn build(b: *Build) !void { .sanitize_thread = enable_tsan, }); mod.addImport("lightpanda", mod); // allow circular "lightpanda" import + mod.addImport("build_config", opts.createModule()); - try addDependencies(b, mod, opts, enable_asan, enable_tsan, prebuilt_v8_path); + try linkV8(b, mod, enable_asan, enable_tsan, prebuilt_v8_path); + try linkCurl(b, mod); + try linkHtml5Ever(b, mod); break :blk mod; }; @@ -143,589 +146,541 @@ pub fn build(b: *Build) !void { const run_step = b.step("legacy_test", "Run the app"); run_step.dependOn(&run_cmd.step); } - - { - // wpt - const exe = b.addExecutable(.{ - .name = "lightpanda-wpt", - .use_llvm = true, - .root_module = b.createModule(.{ - .root_source_file = b.path("src/main_wpt.zig"), - .target = target, - .optimize = optimize, - .sanitize_c = enable_csan, - .sanitize_thread = enable_tsan, - .imports = &.{ - .{ .name = "lightpanda", .module = lightpanda_module }, - }, - }), - }); - b.installArtifact(exe); - - const run_cmd = b.addRunArtifact(exe); - if (b.args) |args| { - run_cmd.addArgs(args); - } - const run_step = b.step("wpt", "Run WPT tests"); - run_step.dependOn(&run_cmd.step); - } } -fn addDependencies( +fn linkV8( b: *Build, mod: *Build.Module, - opts: *Build.Step.Options, is_asan: bool, is_tsan: bool, prebuilt_v8_path: ?[]const u8, ) !void { - mod.addImport("build_config", opts.createModule()); - const target = mod.resolved_target.?; - const dep_opts = .{ + + const dep = b.dependency("v8", .{ .target = target, .optimize = mod.optimize.?, - .cache_root = b.pathFromRoot(".lp-cache"), - .prebuilt_v8_path = prebuilt_v8_path, .is_asan = is_asan, .is_tsan = is_tsan, + .inspector_subtype = false, .v8_enable_sandbox = is_tsan, - }; + .cache_root = b.pathFromRoot(".lp-cache"), + .prebuilt_v8_path = prebuilt_v8_path, + }); + mod.addImport("v8", dep.module("v8")); +} - mod.addIncludePath(b.path("vendor/lightpanda")); +fn linkHtml5Ever(b: *Build, mod: *Build.Module) !void { + const is_debug = if (mod.optimize.? == .Debug) true else false; - { - // html5ever + const exec_cargo = b.addSystemCommand(&.{ + "cargo", "build", + "--profile", if (is_debug) "dev" else "release", + "--manifest-path", "src/html5ever/Cargo.toml", + }); - // Build step to install html5ever dependency. - const html5ever_argv = blk: { - const argv: []const []const u8 = &.{ - "cargo", - "build", - // Seems cargo can figure out required paths out of Cargo.toml. - "--manifest-path", - "src/html5ever/Cargo.toml", - // TODO: We can prefer `--artifact-dir` once it become stable. - "--target-dir", - b.getInstallPath(.prefix, "html5ever"), - // This must be the last argument. - "--release", - }; + // TODO: We can prefer `--artifact-dir` once it become stable. + const out_dir = exec_cargo.addPrefixedOutputDirectoryArg("--target-dir=", "html5ever"); - break :blk switch (mod.optimize.?) { - // Prefer dev build on debug option. - .Debug => argv[0 .. argv.len - 1], - else => argv, - }; - }; - const html5ever_exec_cargo = b.addSystemCommand(html5ever_argv); - const html5ever_step = b.step("html5ever", "Install html5ever dependency (requires cargo)"); - html5ever_step.dependOn(&html5ever_exec_cargo.step); - opts.step.dependOn(html5ever_step); + const html5ever_step = b.step("html5ever", "Install html5ever dependency (requires cargo)"); + html5ever_step.dependOn(&exec_cargo.step); - const html5ever_obj = switch (mod.optimize.?) { - .Debug => b.getInstallPath(.prefix, "html5ever/debug/liblitefetch_html5ever.a"), - // Release builds. - else => b.getInstallPath(.prefix, "html5ever/release/liblitefetch_html5ever.a"), - }; + const obj = out_dir.path(b, if (is_debug) "debug" else "release").path(b, "liblitefetch_html5ever.a"); + mod.addObjectFile(obj); +} - mod.addObjectFile(.{ .cwd_relative = html5ever_obj }); - } +fn linkCurl(b: *Build, mod: *Build.Module) !void { + const target = mod.resolved_target.?; - { - // v8 - const v8_opts = b.addOptions(); - v8_opts.addOption(bool, "inspector_subtype", false); + const curl = buildCurl(b, target, mod.optimize.?); + mod.linkLibrary(curl); - const v8_mod = b.dependency("v8", dep_opts).module("v8"); - v8_mod.addOptions("default_exports", v8_opts); - mod.addImport("v8", v8_mod); - } + const zlib = buildZlib(b, target, mod.optimize.?); + curl.root_module.linkLibrary(zlib); - { - //curl - { - const is_linux = target.result.os.tag == .linux; - if (is_linux) { - mod.addCMacro("HAVE_LINUX_TCP_H", "1"); - mod.addCMacro("HAVE_MSG_NOSIGNAL", "1"); - mod.addCMacro("HAVE_GETHOSTBYNAME_R", "1"); - } - mod.addCMacro("_FILE_OFFSET_BITS", "64"); - mod.addCMacro("BUILDING_LIBCURL", "1"); - mod.addCMacro("CURL_DISABLE_AWS", "1"); - mod.addCMacro("CURL_DISABLE_DICT", "1"); - mod.addCMacro("CURL_DISABLE_DOH", "1"); - mod.addCMacro("CURL_DISABLE_FILE", "1"); - mod.addCMacro("CURL_DISABLE_FTP", "1"); - mod.addCMacro("CURL_DISABLE_GOPHER", "1"); - mod.addCMacro("CURL_DISABLE_KERBEROS", "1"); - mod.addCMacro("CURL_DISABLE_IMAP", "1"); - mod.addCMacro("CURL_DISABLE_IPFS", "1"); - mod.addCMacro("CURL_DISABLE_LDAP", "1"); - mod.addCMacro("CURL_DISABLE_LDAPS", "1"); - mod.addCMacro("CURL_DISABLE_MQTT", "1"); - mod.addCMacro("CURL_DISABLE_NTLM", "1"); - mod.addCMacro("CURL_DISABLE_PROGRESS_METER", "1"); - mod.addCMacro("CURL_DISABLE_POP3", "1"); - mod.addCMacro("CURL_DISABLE_RTSP", "1"); - mod.addCMacro("CURL_DISABLE_SMB", "1"); - mod.addCMacro("CURL_DISABLE_SMTP", "1"); - mod.addCMacro("CURL_DISABLE_TELNET", "1"); - mod.addCMacro("CURL_DISABLE_TFTP", "1"); - mod.addCMacro("CURL_EXTERN_SYMBOL", "__attribute__ ((__visibility__ (\"default\"))"); - mod.addCMacro("CURL_OS", if (is_linux) "\"Linux\"" else "\"mac\""); - mod.addCMacro("CURL_STATICLIB", "1"); - mod.addCMacro("ENABLE_IPV6", "1"); - mod.addCMacro("HAVE_ALARM", "1"); - mod.addCMacro("HAVE_ALLOCA_H", "1"); - mod.addCMacro("HAVE_ARPA_INET_H", "1"); - mod.addCMacro("HAVE_ARPA_TFTP_H", "1"); - mod.addCMacro("HAVE_ASSERT_H", "1"); - mod.addCMacro("HAVE_BASENAME", "1"); - mod.addCMacro("HAVE_BOOL_T", "1"); - mod.addCMacro("HAVE_BROTLI", "1"); - mod.addCMacro("HAVE_BUILTIN_AVAILABLE", "1"); - mod.addCMacro("HAVE_CLOCK_GETTIME_MONOTONIC", "1"); - mod.addCMacro("HAVE_DLFCN_H", "1"); - mod.addCMacro("HAVE_ERRNO_H", "1"); - mod.addCMacro("HAVE_FCNTL", "1"); - mod.addCMacro("HAVE_FCNTL_H", "1"); - mod.addCMacro("HAVE_FCNTL_O_NONBLOCK", "1"); - mod.addCMacro("HAVE_FREEADDRINFO", "1"); - mod.addCMacro("HAVE_FSETXATTR", "1"); - mod.addCMacro("HAVE_FSETXATTR_5", "1"); - mod.addCMacro("HAVE_FTRUNCATE", "1"); - mod.addCMacro("HAVE_GETADDRINFO", "1"); - mod.addCMacro("HAVE_GETEUID", "1"); - mod.addCMacro("HAVE_GETHOSTBYNAME", "1"); - mod.addCMacro("HAVE_GETHOSTBYNAME_R_6", "1"); - mod.addCMacro("HAVE_GETHOSTNAME", "1"); - mod.addCMacro("HAVE_GETPEERNAME", "1"); - mod.addCMacro("HAVE_GETPPID", "1"); - mod.addCMacro("HAVE_GETPPID", "1"); - mod.addCMacro("HAVE_GETPROTOBYNAME", "1"); - mod.addCMacro("HAVE_GETPWUID", "1"); - mod.addCMacro("HAVE_GETPWUID_R", "1"); - mod.addCMacro("HAVE_GETRLIMIT", "1"); - mod.addCMacro("HAVE_GETSOCKNAME", "1"); - mod.addCMacro("HAVE_GETTIMEOFDAY", "1"); - mod.addCMacro("HAVE_GMTIME_R", "1"); - mod.addCMacro("HAVE_IDN2_H", "1"); - mod.addCMacro("HAVE_IF_NAMETOINDEX", "1"); - mod.addCMacro("HAVE_IFADDRS_H", "1"); - mod.addCMacro("HAVE_INET_ADDR", "1"); - mod.addCMacro("HAVE_INET_PTON", "1"); - mod.addCMacro("HAVE_INTTYPES_H", "1"); - mod.addCMacro("HAVE_IOCTL", "1"); - mod.addCMacro("HAVE_IOCTL_FIONBIO", "1"); - mod.addCMacro("HAVE_IOCTL_SIOCGIFADDR", "1"); - mod.addCMacro("HAVE_LDAP_URL_PARSE", "1"); - mod.addCMacro("HAVE_LIBGEN_H", "1"); - mod.addCMacro("HAVE_LIBZ", "1"); - mod.addCMacro("HAVE_LL", "1"); - mod.addCMacro("HAVE_LOCALE_H", "1"); - mod.addCMacro("HAVE_LOCALTIME_R", "1"); - mod.addCMacro("HAVE_LONGLONG", "1"); - mod.addCMacro("HAVE_MALLOC_H", "1"); - mod.addCMacro("HAVE_MEMORY_H", "1"); - mod.addCMacro("HAVE_NET_IF_H", "1"); - mod.addCMacro("HAVE_NETDB_H", "1"); - mod.addCMacro("HAVE_NETINET_IN_H", "1"); - mod.addCMacro("HAVE_NETINET_TCP_H", "1"); - mod.addCMacro("HAVE_PIPE", "1"); - mod.addCMacro("HAVE_POLL", "1"); - mod.addCMacro("HAVE_POLL_FINE", "1"); - mod.addCMacro("HAVE_POLL_H", "1"); - mod.addCMacro("HAVE_POSIX_STRERROR_R", "1"); - mod.addCMacro("HAVE_PTHREAD_H", "1"); - mod.addCMacro("HAVE_PWD_H", "1"); - mod.addCMacro("HAVE_RECV", "1"); - mod.addCMacro("HAVE_SA_FAMILY_T", "1"); - mod.addCMacro("HAVE_SELECT", "1"); - mod.addCMacro("HAVE_SEND", "1"); - mod.addCMacro("HAVE_SETJMP_H", "1"); - mod.addCMacro("HAVE_SETLOCALE", "1"); - mod.addCMacro("HAVE_SETRLIMIT", "1"); - mod.addCMacro("HAVE_SETSOCKOPT", "1"); - mod.addCMacro("HAVE_SIGACTION", "1"); - mod.addCMacro("HAVE_SIGINTERRUPT", "1"); - mod.addCMacro("HAVE_SIGNAL", "1"); - mod.addCMacro("HAVE_SIGNAL_H", "1"); - mod.addCMacro("HAVE_SIGSETJMP", "1"); - mod.addCMacro("HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID", "1"); - mod.addCMacro("HAVE_SOCKET", "1"); - mod.addCMacro("HAVE_STDBOOL_H", "1"); - mod.addCMacro("HAVE_STDINT_H", "1"); - mod.addCMacro("HAVE_STDIO_H", "1"); - mod.addCMacro("HAVE_STDLIB_H", "1"); - mod.addCMacro("HAVE_STRCASECMP", "1"); - mod.addCMacro("HAVE_STRDUP", "1"); - mod.addCMacro("HAVE_STRERROR_R", "1"); - mod.addCMacro("HAVE_STRING_H", "1"); - mod.addCMacro("HAVE_STRINGS_H", "1"); - mod.addCMacro("HAVE_STRSTR", "1"); - mod.addCMacro("HAVE_STRTOK_R", "1"); - mod.addCMacro("HAVE_STRTOLL", "1"); - mod.addCMacro("HAVE_STRUCT_SOCKADDR_STORAGE", "1"); - mod.addCMacro("HAVE_STRUCT_TIMEVAL", "1"); - mod.addCMacro("HAVE_SYS_IOCTL_H", "1"); - mod.addCMacro("HAVE_SYS_PARAM_H", "1"); - mod.addCMacro("HAVE_SYS_POLL_H", "1"); - mod.addCMacro("HAVE_SYS_RESOURCE_H", "1"); - mod.addCMacro("HAVE_SYS_SELECT_H", "1"); - mod.addCMacro("HAVE_SYS_SOCKET_H", "1"); - mod.addCMacro("HAVE_SYS_STAT_H", "1"); - mod.addCMacro("HAVE_SYS_TIME_H", "1"); - mod.addCMacro("HAVE_SYS_TYPES_H", "1"); - mod.addCMacro("HAVE_SYS_UIO_H", "1"); - mod.addCMacro("HAVE_SYS_UN_H", "1"); - mod.addCMacro("HAVE_TERMIO_H", "1"); - mod.addCMacro("HAVE_TERMIOS_H", "1"); - mod.addCMacro("HAVE_TIME_H", "1"); - mod.addCMacro("HAVE_UNAME", "1"); - mod.addCMacro("HAVE_UNISTD_H", "1"); - mod.addCMacro("HAVE_UTIME", "1"); - mod.addCMacro("HAVE_UTIME_H", "1"); - mod.addCMacro("HAVE_UTIMES", "1"); - mod.addCMacro("HAVE_VARIADIC_MACROS_C99", "1"); - mod.addCMacro("HAVE_VARIADIC_MACROS_GCC", "1"); - mod.addCMacro("HAVE_ZLIB_H", "1"); - mod.addCMacro("RANDOM_FILE", "\"/dev/urandom\""); - mod.addCMacro("RECV_TYPE_ARG1", "int"); - mod.addCMacro("RECV_TYPE_ARG2", "void *"); - mod.addCMacro("RECV_TYPE_ARG3", "size_t"); - mod.addCMacro("RECV_TYPE_ARG4", "int"); - mod.addCMacro("RECV_TYPE_RETV", "ssize_t"); - mod.addCMacro("SEND_QUAL_ARG2", "const"); - mod.addCMacro("SEND_TYPE_ARG1", "int"); - mod.addCMacro("SEND_TYPE_ARG2", "void *"); - mod.addCMacro("SEND_TYPE_ARG3", "size_t"); - mod.addCMacro("SEND_TYPE_ARG4", "int"); - mod.addCMacro("SEND_TYPE_RETV", "ssize_t"); - mod.addCMacro("SIZEOF_CURL_OFF_T", "8"); - mod.addCMacro("SIZEOF_INT", "4"); - mod.addCMacro("SIZEOF_LONG", "8"); - mod.addCMacro("SIZEOF_OFF_T", "8"); - mod.addCMacro("SIZEOF_SHORT", "2"); - mod.addCMacro("SIZEOF_SIZE_T", "8"); - mod.addCMacro("SIZEOF_TIME_T", "8"); - mod.addCMacro("STDC_HEADERS", "1"); - mod.addCMacro("TIME_WITH_SYS_TIME", "1"); - mod.addCMacro("USE_NGHTTP2", "1"); - mod.addCMacro("USE_OPENSSL", "1"); - mod.addCMacro("OPENSSL_IS_BORINGSSL", "1"); - mod.addCMacro("USE_THREADS_POSIX", "1"); - mod.addCMacro("USE_UNIX_SOCKETS", "1"); - } + const brotli = buildBrotli(b, target, mod.optimize.?); + for (brotli) |lib| curl.root_module.linkLibrary(lib); - try buildZlib(b, mod); - try buildBrotli(b, mod); - const boringssl_dep = b.dependency("boringssl-zig", .{ - .target = target, - .optimize = mod.optimize.?, - .force_pic = true, - }); + const nghttp2 = buildNghttp2(b, target, mod.optimize.?); + curl.root_module.linkLibrary(nghttp2); - const ssl = boringssl_dep.artifact("ssl"); - ssl.bundle_ubsan_rt = false; - const crypto = boringssl_dep.artifact("crypto"); - crypto.bundle_ubsan_rt = false; + const boringssl = buildBoringSsl(b, target, mod.optimize.?); + for (boringssl) |lib| curl.root_module.linkLibrary(lib); - mod.linkLibrary(ssl); - mod.linkLibrary(crypto); - try buildNghttp2(b, mod); - try buildCurl(b, mod); - - switch (target.result.os.tag) { - .macos => { - // needed for proxying on mac - mod.addSystemFrameworkPath(.{ .cwd_relative = "/System/Library/Frameworks" }); - mod.linkFramework("CoreFoundation", .{}); - mod.linkFramework("SystemConfiguration", .{}); - }, - else => {}, - } + switch (target.result.os.tag) { + .macos => { + // needed for proxying on mac + mod.addSystemFrameworkPath(.{ .cwd_relative = "/System/Library/Frameworks" }); + mod.linkFramework("CoreFoundation", .{}); + mod.linkFramework("SystemConfiguration", .{}); + }, + else => {}, } } -fn buildZlib(b: *Build, m: *Build.Module) !void { - const zlib = b.addLibrary(.{ - .name = "zlib", - .root_module = m, +fn buildZlib(b: *Build, target: Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) *Build.Step.Compile { + const dep = b.dependency("zlib", .{}); + + const mod = b.createModule(.{ + .target = target, + .optimize = optimize, + .link_libc = true, }); - const root = "vendor/zlib/"; - zlib.installHeader(b.path(root ++ "zlib.h"), "zlib.h"); - zlib.installHeader(b.path(root ++ "zconf.h"), "zconf.h"); - zlib.addCSourceFiles(.{ .flags = &.{ - "-DHAVE_SYS_TYPES_H", - "-DHAVE_STDINT_H", - "-DHAVE_STDDEF_H", - }, .files = &.{ - root ++ "adler32.c", - root ++ "compress.c", - root ++ "crc32.c", - root ++ "deflate.c", - root ++ "gzclose.c", - root ++ "gzlib.c", - root ++ "gzread.c", - root ++ "gzwrite.c", - root ++ "inflate.c", - root ++ "infback.c", - root ++ "inftrees.c", - root ++ "inffast.c", - root ++ "trees.c", - root ++ "uncompr.c", - root ++ "zutil.c", - } }); -} - -fn buildBrotli(b: *Build, m: *Build.Module) !void { - const brotli = b.addLibrary(.{ - .name = "brotli", - .root_module = m, - }); - - const root = "vendor/brotli/c/"; - brotli.addIncludePath(b.path(root ++ "include")); - brotli.addCSourceFiles(.{ .flags = &.{}, .files = &.{ - root ++ "common/constants.c", - root ++ "common/context.c", - root ++ "common/dictionary.c", - root ++ "common/platform.c", - root ++ "common/shared_dictionary.c", - root ++ "common/transform.c", - root ++ "dec/bit_reader.c", - root ++ "dec/decode.c", - root ++ "dec/huffman.c", - root ++ "dec/prefix.c", - root ++ "dec/state.c", - root ++ "dec/static_init.c", - } }); -} - -fn buildNghttp2(b: *Build, m: *Build.Module) !void { - const nghttp2 = b.addLibrary(.{ - .name = "nghttp2", - .root_module = m, - }); - - const root = "vendor/nghttp2/"; - nghttp2.addIncludePath(b.path(root ++ "lib")); - nghttp2.addIncludePath(b.path(root ++ "lib/includes")); - nghttp2.addCSourceFiles(.{ .flags = &.{ - "-DNGHTTP2_STATICLIB", - "-DHAVE_NETINET_IN", - "-DHAVE_TIME_H", - }, .files = &.{ - root ++ "lib/sfparse.c", - root ++ "lib/nghttp2_alpn.c", - root ++ "lib/nghttp2_buf.c", - root ++ "lib/nghttp2_callbacks.c", - root ++ "lib/nghttp2_debug.c", - root ++ "lib/nghttp2_extpri.c", - root ++ "lib/nghttp2_frame.c", - root ++ "lib/nghttp2_hd.c", - root ++ "lib/nghttp2_hd_huffman.c", - root ++ "lib/nghttp2_hd_huffman_data.c", - root ++ "lib/nghttp2_helper.c", - root ++ "lib/nghttp2_http.c", - root ++ "lib/nghttp2_map.c", - root ++ "lib/nghttp2_mem.c", - root ++ "lib/nghttp2_option.c", - root ++ "lib/nghttp2_outbound_item.c", - root ++ "lib/nghttp2_pq.c", - root ++ "lib/nghttp2_priority_spec.c", - root ++ "lib/nghttp2_queue.c", - root ++ "lib/nghttp2_rcbuf.c", - root ++ "lib/nghttp2_session.c", - root ++ "lib/nghttp2_stream.c", - root ++ "lib/nghttp2_submit.c", - root ++ "lib/nghttp2_version.c", - root ++ "lib/nghttp2_ratelim.c", - root ++ "lib/nghttp2_time.c", - } }); -} - -fn buildCurl(b: *Build, m: *Build.Module) !void { - const curl = b.addLibrary(.{ - .name = "curl", - .root_module = m, - }); - - const root = "vendor/curl/"; - - curl.addIncludePath(b.path(root ++ "lib")); - curl.addIncludePath(b.path(root ++ "include")); - curl.addIncludePath(b.path("vendor/zlib")); - - curl.addCSourceFiles(.{ - .flags = &.{}, + const lib = b.addLibrary(.{ .name = "z", .root_module = mod }); + lib.installHeadersDirectory(dep.path(""), "", .{}); + lib.addCSourceFiles(.{ + .root = dep.path(""), + .flags = &.{ + "-DHAVE_SYS_TYPES_H", + "-DHAVE_STDINT_H", + "-DHAVE_STDDEF_H", + "-DHAVE_UNISTD_H", + }, .files = &.{ - root ++ "lib/altsvc.c", - root ++ "lib/amigaos.c", - root ++ "lib/asyn-ares.c", - root ++ "lib/asyn-base.c", - root ++ "lib/asyn-thrdd.c", - root ++ "lib/bufq.c", - root ++ "lib/bufref.c", - root ++ "lib/cf-h1-proxy.c", - root ++ "lib/cf-h2-proxy.c", - root ++ "lib/cf-haproxy.c", - root ++ "lib/cf-https-connect.c", - root ++ "lib/cf-socket.c", - root ++ "lib/cfilters.c", - root ++ "lib/conncache.c", - root ++ "lib/connect.c", - root ++ "lib/content_encoding.c", - root ++ "lib/cookie.c", - root ++ "lib/cshutdn.c", - root ++ "lib/curl_addrinfo.c", - root ++ "lib/curl_des.c", - root ++ "lib/curl_endian.c", - root ++ "lib/curl_fnmatch.c", - root ++ "lib/curl_get_line.c", - root ++ "lib/curl_gethostname.c", - root ++ "lib/curl_gssapi.c", - root ++ "lib/curl_memrchr.c", - root ++ "lib/curl_ntlm_core.c", - root ++ "lib/curl_range.c", - root ++ "lib/curl_rtmp.c", - root ++ "lib/curl_sasl.c", - root ++ "lib/curl_sha512_256.c", - root ++ "lib/curl_sspi.c", - root ++ "lib/curl_threads.c", - root ++ "lib/curl_trc.c", - root ++ "lib/cw-out.c", - root ++ "lib/cw-pause.c", - root ++ "lib/dict.c", - root ++ "lib/doh.c", - root ++ "lib/dynhds.c", - root ++ "lib/easy.c", - root ++ "lib/easygetopt.c", - root ++ "lib/easyoptions.c", - root ++ "lib/escape.c", - root ++ "lib/fake_addrinfo.c", - root ++ "lib/file.c", - root ++ "lib/fileinfo.c", - root ++ "lib/fopen.c", - root ++ "lib/formdata.c", - root ++ "lib/ftp.c", - root ++ "lib/ftplistparser.c", - root ++ "lib/getenv.c", - root ++ "lib/getinfo.c", - root ++ "lib/gopher.c", - root ++ "lib/hash.c", - root ++ "lib/headers.c", - root ++ "lib/hmac.c", - root ++ "lib/hostip.c", - root ++ "lib/hostip4.c", - root ++ "lib/hostip6.c", - root ++ "lib/hsts.c", - root ++ "lib/http.c", - root ++ "lib/http1.c", - root ++ "lib/http2.c", - root ++ "lib/http_aws_sigv4.c", - root ++ "lib/http_chunks.c", - root ++ "lib/http_digest.c", - root ++ "lib/http_negotiate.c", - root ++ "lib/http_ntlm.c", - root ++ "lib/http_proxy.c", - root ++ "lib/httpsrr.c", - root ++ "lib/idn.c", - root ++ "lib/if2ip.c", - root ++ "lib/imap.c", - root ++ "lib/krb5.c", - root ++ "lib/ldap.c", - root ++ "lib/llist.c", - root ++ "lib/macos.c", - root ++ "lib/md4.c", - root ++ "lib/md5.c", - root ++ "lib/memdebug.c", - root ++ "lib/mime.c", - root ++ "lib/mprintf.c", - root ++ "lib/mqtt.c", - root ++ "lib/multi.c", - root ++ "lib/multi_ev.c", - root ++ "lib/netrc.c", - root ++ "lib/noproxy.c", - root ++ "lib/openldap.c", - root ++ "lib/parsedate.c", - root ++ "lib/pingpong.c", - root ++ "lib/pop3.c", - root ++ "lib/progress.c", - root ++ "lib/psl.c", - root ++ "lib/rand.c", - root ++ "lib/rename.c", - root ++ "lib/request.c", - root ++ "lib/rtsp.c", - root ++ "lib/select.c", - root ++ "lib/sendf.c", - root ++ "lib/setopt.c", - root ++ "lib/sha256.c", - root ++ "lib/share.c", - root ++ "lib/slist.c", - root ++ "lib/smb.c", - root ++ "lib/smtp.c", - root ++ "lib/socketpair.c", - root ++ "lib/socks.c", - root ++ "lib/socks_gssapi.c", - root ++ "lib/socks_sspi.c", - root ++ "lib/speedcheck.c", - root ++ "lib/splay.c", - root ++ "lib/strcase.c", - root ++ "lib/strdup.c", - root ++ "lib/strequal.c", - root ++ "lib/strerror.c", - root ++ "lib/system_win32.c", - root ++ "lib/telnet.c", - root ++ "lib/tftp.c", - root ++ "lib/transfer.c", - root ++ "lib/uint-bset.c", - root ++ "lib/uint-hash.c", - root ++ "lib/uint-spbset.c", - root ++ "lib/uint-table.c", - root ++ "lib/url.c", - root ++ "lib/urlapi.c", - root ++ "lib/version.c", - root ++ "lib/ws.c", - root ++ "lib/curlx/base64.c", - root ++ "lib/curlx/dynbuf.c", - root ++ "lib/curlx/inet_ntop.c", - root ++ "lib/curlx/nonblock.c", - root ++ "lib/curlx/strparse.c", - root ++ "lib/curlx/timediff.c", - root ++ "lib/curlx/timeval.c", - root ++ "lib/curlx/wait.c", - root ++ "lib/curlx/warnless.c", - root ++ "lib/vquic/curl_ngtcp2.c", - root ++ "lib/vquic/curl_osslq.c", - root ++ "lib/vquic/curl_quiche.c", - root ++ "lib/vquic/vquic.c", - root ++ "lib/vquic/vquic-tls.c", - root ++ "lib/vauth/cleartext.c", - root ++ "lib/vauth/cram.c", - root ++ "lib/vauth/digest.c", - root ++ "lib/vauth/digest_sspi.c", - root ++ "lib/vauth/gsasl.c", - root ++ "lib/vauth/krb5_gssapi.c", - root ++ "lib/vauth/krb5_sspi.c", - root ++ "lib/vauth/ntlm.c", - root ++ "lib/vauth/ntlm_sspi.c", - root ++ "lib/vauth/oauth2.c", - root ++ "lib/vauth/spnego_gssapi.c", - root ++ "lib/vauth/spnego_sspi.c", - root ++ "lib/vauth/vauth.c", - root ++ "lib/vtls/cipher_suite.c", - root ++ "lib/vtls/openssl.c", - root ++ "lib/vtls/hostcheck.c", - root ++ "lib/vtls/keylog.c", - root ++ "lib/vtls/vtls.c", - root ++ "lib/vtls/vtls_scache.c", - root ++ "lib/vtls/x509asn1.c", + "adler32.c", "compress.c", "crc32.c", + "deflate.c", "gzclose.c", "gzlib.c", + "gzread.c", "gzwrite.c", "infback.c", + "inffast.c", "inflate.c", "inftrees.c", + "trees.c", "uncompr.c", "zutil.c", }, }); + + return lib; +} + +fn buildBrotli(b: *Build, target: Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) [3]*Build.Step.Compile { + const dep = b.dependency("brotli", .{}); + + const mod = b.createModule(.{ + .target = target, + .optimize = optimize, + .link_libc = true, + }); + mod.addIncludePath(dep.path("c/include")); + + const brotlicmn = b.addLibrary(.{ .name = "brotlicommon", .root_module = mod }); + const brotlidec = b.addLibrary(.{ .name = "brotlidec", .root_module = mod }); + const brotlienc = b.addLibrary(.{ .name = "brotlienc", .root_module = mod }); + + brotlicmn.installHeadersDirectory(dep.path("c/include/brotli"), "brotli", .{}); + brotlicmn.addCSourceFiles(.{ + .root = dep.path("c/common"), + .files = &.{ + "transform.c", "shared_dictionary.c", "platform.c", + "dictionary.c", "context.c", "constants.c", + }, + }); + brotlidec.addCSourceFiles(.{ + .root = dep.path("c/dec"), + .files = &.{ + "bit_reader.c", "decode.c", "huffman.c", + "prefix.c", "state.c", "static_init.c", + }, + }); + brotlienc.addCSourceFiles(.{ + .root = dep.path("c/enc"), + .files = &.{ + "backward_references.c", "backward_references_hq.c", "bit_cost.c", + "block_splitter.c", "brotli_bit_stream.c", "cluster.c", + "command.c", "compound_dictionary.c", "compress_fragment.c", + "compress_fragment_two_pass.c", "dictionary_hash.c", "encode.c", + "encoder_dict.c", "entropy_encode.c", "fast_log.c", + "histogram.c", "literal_cost.c", "memory.c", + "metablock.c", "static_dict.c", "static_dict_lut.c", + "static_init.c", "utf8_util.c", + }, + }); + + return .{ brotlicmn, brotlidec, brotlienc }; +} + +fn buildBoringSsl(b: *Build, target: Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) [2]*Build.Step.Compile { + const dep = b.dependency("boringssl-zig", .{ + .target = target, + .optimize = optimize, + .force_pic = true, + }); + + const ssl = dep.artifact("ssl"); + ssl.bundle_ubsan_rt = false; + + const crypto = dep.artifact("crypto"); + crypto.bundle_ubsan_rt = false; + + return .{ ssl, crypto }; +} + +fn buildNghttp2(b: *Build, target: Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) *Build.Step.Compile { + const dep = b.dependency("nghttp2", .{}); + + const mod = b.createModule(.{ + .target = target, + .optimize = optimize, + .link_libc = true, + }); + mod.addIncludePath(dep.path("lib/includes")); + + const config = b.addConfigHeader(.{ + .include_path = "nghttp2ver.h", + .style = .{ .cmake = dep.path("lib/includes/nghttp2/nghttp2ver.h.in") }, + }, .{ + .PACKAGE_VERSION = "1.68.90", + .PACKAGE_VERSION_NUM = 0x016890, + }); + mod.addConfigHeader(config); + + const lib = b.addLibrary(.{ .name = "nghttp2", .root_module = mod }); + + lib.installConfigHeader(config); + lib.installHeadersDirectory(dep.path("lib/includes/nghttp2"), "nghttp2", .{}); + lib.addCSourceFiles(.{ + .root = dep.path("lib"), + .flags = &.{ + "-DNGHTTP2_STATICLIB", + "-DHAVE_TIME_H", + "-DHAVE_ARPA_INET_H", + "-DHAVE_NETINET_IN_H", + }, + .files = &.{ + "sfparse.c", "nghttp2_alpn.c", "nghttp2_buf.c", + "nghttp2_callbacks.c", "nghttp2_debug.c", "nghttp2_extpri.c", + "nghttp2_frame.c", "nghttp2_hd.c", "nghttp2_hd_huffman.c", + "nghttp2_hd_huffman_data.c", "nghttp2_helper.c", "nghttp2_http.c", + "nghttp2_map.c", "nghttp2_mem.c", "nghttp2_option.c", + "nghttp2_outbound_item.c", "nghttp2_pq.c", "nghttp2_priority_spec.c", + "nghttp2_queue.c", "nghttp2_rcbuf.c", "nghttp2_session.c", + "nghttp2_stream.c", "nghttp2_submit.c", "nghttp2_version.c", + "nghttp2_ratelim.c", "nghttp2_time.c", + }, + }); + + return lib; +} + +fn buildCurl( + b: *Build, + target: Build.ResolvedTarget, + optimize: std.builtin.OptimizeMode, +) *Build.Step.Compile { + const dep = b.dependency("curl", .{}); + + const mod = b.createModule(.{ + .target = target, + .optimize = optimize, + .link_libc = true, + }); + mod.addIncludePath(dep.path("lib")); + mod.addIncludePath(dep.path("include")); + + const os = target.result.os.tag; + const abi = target.result.abi; + + const is_gnu = abi.isGnu(); + const is_ios = os == .ios; + const is_android = abi.isAndroid(); + const is_linux = os == .linux; + const is_darwin = os.isDarwin(); + const is_windows = os == .windows; + const is_netbsd = os == .netbsd; + const is_openbsd = os == .openbsd; + const is_freebsd = os == .freebsd; + + const byte_size = struct { + fn it(b2: *std.Build, target2: Build.ResolvedTarget, name: []const u8, comptime ctype: std.Target.CType) []const u8 { + const size = target2.result.cTypeByteSize(ctype); + return std.fmt.allocPrint(b2.allocator, "#define SIZEOF_{s} {d}", .{ name, size }) catch @panic("OOM"); + } + }.it; + + const config = .{ + .HAVE_LIBZ = true, + .HAVE_BROTLI = true, + .USE_NGHTTP2 = true, + + .USE_OPENSSL = true, + .OPENSSL_IS_BORINGSSL = true, + .CURL_CA_PATH = null, + .CURL_CA_BUNDLE = null, + .CURL_CA_FALLBACK = false, + .CURL_CA_SEARCH_SAFE = false, + .CURL_DEFAULT_SSL_BACKEND = "openssl", + + .CURL_DISABLE_AWS = true, + .CURL_DISABLE_DICT = true, + .CURL_DISABLE_DOH = true, + .CURL_DISABLE_FILE = true, + .CURL_DISABLE_FTP = true, + .CURL_DISABLE_GOPHER = true, + .CURL_DISABLE_KERBEROS_AUTH = true, + .CURL_DISABLE_IMAP = true, + .CURL_DISABLE_IPFS = true, + .CURL_DISABLE_LDAP = true, + .CURL_DISABLE_LDAPS = true, + .CURL_DISABLE_MQTT = true, + .CURL_DISABLE_NTLM = true, + .CURL_DISABLE_PROGRESS_METER = true, + .CURL_DISABLE_POP3 = true, + .CURL_DISABLE_RTSP = true, + .CURL_DISABLE_SMB = true, + .CURL_DISABLE_SMTP = true, + .CURL_DISABLE_TELNET = true, + .CURL_DISABLE_TFTP = true, + + .ssize_t = null, + ._FILE_OFFSET_BITS = 64, + + .USE_IPV6 = true, + .CURL_OS = switch (os) { + .linux => if (is_android) "\"android\"" else "\"linux\"", + else => std.fmt.allocPrint(b.allocator, "\"{s}\"", .{@tagName(os)}) catch @panic("OOM"), + }, + + // Adjusts the sizes of variables + .SIZEOF_INT_CODE = byte_size(b, target, "INT", .int), + .SIZEOF_LONG_CODE = byte_size(b, target, "LONG", .long), + .SIZEOF_LONG_LONG_CODE = byte_size(b, target, "LONG_LONG", .longlong), + + .SIZEOF_OFF_T_CODE = byte_size(b, target, "OFF_T", .longlong), + .SIZEOF_CURL_OFF_T_CODE = byte_size(b, target, "CURL_OFF_T", .longlong), + .SIZEOF_CURL_SOCKET_T_CODE = byte_size(b, target, "CURL_SOCKET_T", .int), + + .SIZEOF_SIZE_T_CODE = byte_size(b, target, "SIZE_T", .longlong), + .SIZEOF_TIME_T_CODE = byte_size(b, target, "TIME_T", .longlong), + + // headers availability + .HAVE_ARPA_INET_H = !is_windows, + .HAVE_DIRENT_H = true, + .HAVE_FCNTL_H = true, + .HAVE_IFADDRS_H = !is_windows, + .HAVE_IO_H = is_windows, + .HAVE_LIBGEN_H = true, + .HAVE_LINUX_TCP_H = is_linux and is_gnu, + .HAVE_LOCALE_H = true, + .HAVE_NETDB_H = !is_windows, + .HAVE_NETINET_IN6_H = is_android, + .HAVE_NETINET_IN_H = !is_windows, + .HAVE_NETINET_TCP_H = !is_windows, + .HAVE_NETINET_UDP_H = !is_windows, + .HAVE_NET_IF_H = !is_windows, + .HAVE_POLL_H = !is_windows, + .HAVE_PWD_H = !is_windows, + .HAVE_STDATOMIC_H = true, + .HAVE_STDBOOL_H = true, + .HAVE_STDDEF_H = true, + .HAVE_STDINT_H = true, + .HAVE_STRINGS_H = true, + .HAVE_STROPTS_H = false, + .HAVE_SYS_EVENTFD_H = is_linux or is_freebsd or is_netbsd, + .HAVE_SYS_FILIO_H = !is_linux and !is_windows, + .HAVE_SYS_IOCTL_H = !is_windows, + .HAVE_SYS_PARAM_H = true, + .HAVE_SYS_POLL_H = !is_windows, + .HAVE_SYS_RESOURCE_H = !is_windows, + .HAVE_SYS_SELECT_H = !is_windows, + .HAVE_SYS_SOCKIO_H = !is_linux and !is_windows, + .HAVE_SYS_TYPES_H = true, + .HAVE_SYS_UN_H = !is_windows, + .HAVE_SYS_UTIME_H = is_windows, + .HAVE_TERMIOS_H = !is_windows, + .HAVE_TERMIO_H = is_linux, + .HAVE_UNISTD_H = true, + .HAVE_UTIME_H = true, + .STDC_HEADERS = true, + + // general environment + .CURL_KRB5_VERSION = null, + .HAVE_ALARM = !is_windows, + .HAVE_ARC4RANDOM = is_android, + .HAVE_ATOMIC = true, + .HAVE_BOOL_T = true, + .HAVE_BUILTIN_AVAILABLE = true, + .HAVE_CLOCK_GETTIME_MONOTONIC = !is_darwin and !is_windows, + .HAVE_CLOCK_GETTIME_MONOTONIC_RAW = is_linux, + .HAVE_FILE_OFFSET_BITS = true, + .HAVE_GETEUID = !is_windows, + .HAVE_GETPPID = !is_windows, + .HAVE_GETTIMEOFDAY = true, + .HAVE_GLIBC_STRERROR_R = is_gnu, + .HAVE_GMTIME_R = !is_windows, + .HAVE_LOCALTIME_R = !is_windows, + .HAVE_LONGLONG = !is_windows, + .HAVE_MACH_ABSOLUTE_TIME = is_darwin, + .HAVE_MEMRCHR = !is_darwin and !is_windows, + .HAVE_POSIX_STRERROR_R = !is_gnu and !is_windows, + .HAVE_PTHREAD_H = !is_windows, + .HAVE_SETLOCALE = true, + .HAVE_SETRLIMIT = !is_windows, + .HAVE_SIGACTION = !is_windows, + .HAVE_SIGINTERRUPT = !is_windows, + .HAVE_SIGNAL = true, + .HAVE_SIGSETJMP = !is_windows, + .HAVE_SIZEOF_SA_FAMILY_T = false, + .HAVE_SIZEOF_SUSECONDS_T = false, + .HAVE_SNPRINTF = true, + .HAVE_STRCASECMP = !is_windows, + .HAVE_STRCMPI = false, + .HAVE_STRDUP = true, + .HAVE_STRERROR_R = !is_windows, + .HAVE_STRICMP = false, + .HAVE_STRUCT_TIMEVAL = true, + .HAVE_TIME_T_UNSIGNED = false, + .HAVE_UTIME = true, + .HAVE_UTIMES = !is_windows, + .HAVE_WRITABLE_ARGV = !is_windows, + .HAVE__SETMODE = is_windows, + .USE_THREADS_POSIX = !is_windows, + + // filesystem, network + .HAVE_ACCEPT4 = is_linux or is_freebsd or is_netbsd or is_openbsd, + .HAVE_BASENAME = true, + .HAVE_CLOSESOCKET = is_windows, + .HAVE_DECL_FSEEKO = !is_windows, + .HAVE_EVENTFD = is_linux or is_freebsd or is_netbsd, + .HAVE_FCNTL = !is_windows, + .HAVE_FCNTL_O_NONBLOCK = !is_windows, + .HAVE_FNMATCH = !is_windows, + .HAVE_FREEADDRINFO = true, + .HAVE_FSEEKO = !is_windows, + .HAVE_FSETXATTR = is_darwin or is_linux or is_netbsd, + .HAVE_FSETXATTR_5 = is_linux or is_netbsd, + .HAVE_FSETXATTR_6 = is_darwin, + .HAVE_FTRUNCATE = true, + .HAVE_GETADDRINFO = true, + .HAVE_GETADDRINFO_THREADSAFE = is_linux or is_freebsd or is_netbsd, + .HAVE_GETHOSTBYNAME_R = is_linux or is_freebsd, + .HAVE_GETHOSTBYNAME_R_3 = false, + .HAVE_GETHOSTBYNAME_R_3_REENTRANT = false, + .HAVE_GETHOSTBYNAME_R_5 = false, + .HAVE_GETHOSTBYNAME_R_5_REENTRANT = false, + .HAVE_GETHOSTBYNAME_R_6 = is_linux, + .HAVE_GETHOSTBYNAME_R_6_REENTRANT = is_linux, + .HAVE_GETHOSTNAME = true, + .HAVE_GETIFADDRS = if (is_windows) false else !is_android or target.result.os.versionRange().linux.android >= 24, + .HAVE_GETPASS_R = is_netbsd, + .HAVE_GETPEERNAME = true, + .HAVE_GETPWUID = !is_windows, + .HAVE_GETPWUID_R = !is_windows, + .HAVE_GETRLIMIT = !is_windows, + .HAVE_GETSOCKNAME = true, + .HAVE_IF_NAMETOINDEX = !is_windows, + .HAVE_INET_NTOP = !is_windows, + .HAVE_INET_PTON = !is_windows, + .HAVE_IOCTLSOCKET = is_windows, + .HAVE_IOCTLSOCKET_CAMEL = false, + .HAVE_IOCTLSOCKET_CAMEL_FIONBIO = false, + .HAVE_IOCTLSOCKET_FIONBIO = is_windows, + .HAVE_IOCTL_FIONBIO = !is_windows, + .HAVE_IOCTL_SIOCGIFADDR = !is_windows, + .HAVE_MSG_NOSIGNAL = !is_windows, + .HAVE_OPENDIR = true, + .HAVE_PIPE = !is_windows, + .HAVE_PIPE2 = is_linux or is_freebsd or is_netbsd or is_openbsd, + .HAVE_POLL = !is_windows, + .HAVE_REALPATH = !is_windows, + .HAVE_RECV = true, + .HAVE_SA_FAMILY_T = !is_windows, + .HAVE_SCHED_YIELD = !is_windows, + .HAVE_SELECT = true, + .HAVE_SEND = true, + .HAVE_SENDMMSG = !is_darwin and !is_windows, + .HAVE_SENDMSG = !is_windows, + .HAVE_SETMODE = !is_linux, + .HAVE_SETSOCKOPT_SO_NONBLOCK = false, + .HAVE_SOCKADDR_IN6_SIN6_ADDR = !is_windows, + .HAVE_SOCKADDR_IN6_SIN6_SCOPE_ID = true, + .HAVE_SOCKET = true, + .HAVE_SOCKETPAIR = !is_windows, + .HAVE_STRUCT_SOCKADDR_STORAGE = true, + .HAVE_SUSECONDS_T = is_android or is_ios, + .USE_UNIX_SOCKETS = !is_windows, + }; + + const curl_config = b.addConfigHeader(.{ + .include_path = "curl_config.h", + .style = .{ .cmake = dep.path("lib/curl_config-cmake.h.in") }, + }, .{ + .CURL_EXTERN_SYMBOL = "__attribute__ ((__visibility__ (\"default\"))", + }); + curl_config.addValues(config); + + const lib = b.addLibrary(.{ .name = "curl", .root_module = mod }); + lib.addConfigHeader(curl_config); + lib.installHeadersDirectory(dep.path("include/curl"), "curl", .{}); + lib.addCSourceFiles(.{ + .root = dep.path("lib"), + .flags = &.{ + "-D_GNU_SOURCE", + "-DHAVE_CONFIG_H", + "-DCURL_STATICLIB", + "-DBUILDING_LIBCURL", + }, + .files = &.{ + // You can include all files from lib, libcurl uses #ifdef-guards to exclude code for disabled functions + "altsvc.c", "amigaos.c", "asyn-ares.c", + "asyn-base.c", "asyn-thrdd.c", "bufq.c", + "bufref.c", "cf-h1-proxy.c", "cf-h2-proxy.c", + "cf-haproxy.c", "cf-https-connect.c", "cf-ip-happy.c", + "cf-socket.c", "cfilters.c", "conncache.c", + "connect.c", "content_encoding.c", "cookie.c", + "cshutdn.c", "curl_addrinfo.c", "curl_endian.c", + "curl_fnmatch.c", "curl_fopen.c", "curl_get_line.c", + "curl_gethostname.c", "curl_gssapi.c", "curl_memrchr.c", + "curl_ntlm_core.c", "curl_range.c", "curl_rtmp.c", + "curl_sasl.c", "curl_sha512_256.c", "curl_share.c", + "curl_sspi.c", "curl_threads.c", "curl_trc.c", + "curlx/base64.c", "curlx/dynbuf.c", "curlx/fopen.c", + "curlx/inet_ntop.c", "curlx/inet_pton.c", "curlx/multibyte.c", + "curlx/nonblock.c", "curlx/strcopy.c", "curlx/strerr.c", + "curlx/strparse.c", "curlx/timediff.c", "curlx/timeval.c", + "curlx/version_win32.c", "curlx/wait.c", "curlx/warnless.c", + "curlx/winapi.c", "cw-out.c", "cw-pause.c", + "dict.c", "dllmain.c", "doh.c", + "dynhds.c", "easy.c", "easygetopt.c", + "easyoptions.c", "escape.c", "fake_addrinfo.c", + "file.c", "fileinfo.c", "formdata.c", + "ftp.c", "ftplistparser.c", "getenv.c", + "getinfo.c", "gopher.c", "hash.c", + "headers.c", "hmac.c", "hostip.c", + "hostip4.c", "hostip6.c", "hsts.c", + "http.c", "http1.c", "http2.c", + "http_aws_sigv4.c", "http_chunks.c", "http_digest.c", + "http_negotiate.c", "http_ntlm.c", "http_proxy.c", + "httpsrr.c", "idn.c", "if2ip.c", + "imap.c", "ldap.c", "llist.c", + "macos.c", "md4.c", "md5.c", + "memdebug.c", "mime.c", "mprintf.c", + "mqtt.c", "multi.c", "multi_ev.c", + "multi_ntfy.c", "netrc.c", "noproxy.c", + "openldap.c", "parsedate.c", "pingpong.c", + "pop3.c", "progress.c", "psl.c", + "rand.c", "ratelimit.c", "request.c", + "rtsp.c", "select.c", "sendf.c", + "setopt.c", "sha256.c", "slist.c", + "smb.c", "smtp.c", "socketpair.c", + "socks.c", "socks_gssapi.c", "socks_sspi.c", + "splay.c", "strcase.c", "strdup.c", + "strequal.c", "strerror.c", "system_win32.c", + "telnet.c", "tftp.c", "transfer.c", + "uint-bset.c", "uint-hash.c", "uint-spbset.c", + "uint-table.c", "url.c", "urlapi.c", + "vauth/cleartext.c", "vauth/cram.c", "vauth/digest.c", + "vauth/digest_sspi.c", "vauth/gsasl.c", "vauth/krb5_gssapi.c", + "vauth/krb5_sspi.c", "vauth/ntlm.c", "vauth/ntlm_sspi.c", + "vauth/oauth2.c", "vauth/spnego_gssapi.c", "vauth/spnego_sspi.c", + "vauth/vauth.c", "version.c", "vquic/curl_ngtcp2.c", + "vquic/curl_osslq.c", "vquic/curl_quiche.c", "vquic/vquic-tls.c", + "vquic/vquic.c", "vssh/libssh.c", "vssh/libssh2.c", + "vssh/vssh.c", "vtls/apple.c", "vtls/cipher_suite.c", + "vtls/gtls.c", "vtls/hostcheck.c", "vtls/keylog.c", + "vtls/mbedtls.c", "vtls/openssl.c", "vtls/rustls.c", + "vtls/schannel.c", "vtls/schannel_verify.c", "vtls/vtls.c", + "vtls/vtls_scache.c", "vtls/vtls_spack.c", "vtls/wolfssl.c", + "vtls/x509asn1.c", "ws.c", + }, + }); + + return lib; } const Manifest = struct { diff --git a/build.zig.zon b/build.zig.zon index 946210d1..b7525c77 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -1,18 +1,36 @@ .{ .name = .browser, - .paths = .{""}, .version = "0.0.0", .fingerprint = 0xda130f3af836cea0, // Changing this has security and trust implications. .minimum_zig_version = "0.15.2", .dependencies = .{ .v8 = .{ - .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.2.9.tar.gz", - .hash = "v8-0.0.0-xddH689vBACgpqFVEhT2wxRin-qQQSOcKJoM37MVo0rU", + .url = "https://github.com/lightpanda-io/zig-v8-fork/archive/refs/tags/v0.3.1.tar.gz", + .hash = "v8-0.0.0-xddH64J7BAC81mkf6G9RbEJxS-W3TIRl5iFnShwbqCqy", + }, //.v8 = .{ .path = "../zig-v8-fork" }, + .brotli = .{ + // v1.2.0 + .url = "https://github.com/google/brotli/archive/028fb5a23661f123017c060daa546b55cf4bde29.tar.gz", + .hash = "N-V-__8AAJudKgCQCuIiH6MJjAiIJHfg_tT_Ew-0vZwVkCo_", + }, + .zlib = .{ + .url = "https://github.com/madler/zlib/releases/download/v1.3.2/zlib-1.3.2.tar.gz", + .hash = "N-V-__8AAJ2cNgAgfBtAw33Bxfu1IWISDeKKSr3DAqoAysIJ", + }, + .nghttp2 = .{ + .url = "https://github.com/nghttp2/nghttp2/releases/download/v1.68.0/nghttp2-1.68.0.tar.gz", + .hash = "N-V-__8AAL15vQCI63ZL6Zaz5hJg6JTEgYXGbLnMFSnf7FT3", + }, .@"boringssl-zig" = .{ .url = "git+https://github.com/Syndica/boringssl-zig.git#c53df00d06b02b755ad88bbf4d1202ed9687b096", .hash = "boringssl-0.1.0-VtJeWehMAAA4RNnwRnzEvKcS9rjsR1QVRw1uJrwXxmVK", }, + .curl = .{ + .url = "https://github.com/curl/curl/releases/download/curl-8_18_0/curl-8.18.0.tar.gz", + .hash = "N-V-__8AALp9QAGn6CCHZ6fK_FfMyGtG824LSHYHHasM3w-y", + }, }, + .paths = .{""}, } diff --git a/src/Config.zig b/src/Config.zig index da5aa0c8..5a4cc58e 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -191,13 +191,15 @@ pub const Mcp = struct { pub const DumpFormat = enum { html, markdown, + wpt, }; pub const Fetch = struct { url: [:0]const u8, dump_mode: ?DumpFormat = null, common: Common = .{}, - withbase: bool = false, + with_base: bool = false, + with_frames: bool = false, strip: dump.Opts.Strip = .{}, }; @@ -348,6 +350,8 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\ \\--with_base Add a tag in dump. Defaults to false. \\ + \\--with_frames Includes the contents of iframes. Defaults to false. + \\ ++ common_options ++ \\ \\serve command @@ -454,6 +458,10 @@ fn inferMode(opt: []const u8) ?RunMode { return .fetch; } + if (std.mem.eql(u8, opt, "--with_frames")) { + return .fetch; + } + if (std.mem.eql(u8, opt, "--host")) { return .serve; } @@ -571,7 +579,8 @@ fn parseFetchArgs( args: *std.process.ArgIterator, ) !Fetch { var dump_mode: ?DumpFormat = null; - var withbase: bool = false; + var with_base: bool = false; + var with_frames: bool = false; var url: ?[:0]const u8 = null; var common: Common = .{}; var strip: dump.Opts.Strip = .{}; @@ -602,7 +611,12 @@ fn parseFetchArgs( } if (std.mem.eql(u8, "--with_base", opt)) { - withbase = true; + with_base = true; + continue; + } + + if (std.mem.eql(u8, "--with_frames", opt)) { + with_frames = true; continue; } @@ -658,7 +672,8 @@ fn parseFetchArgs( .dump_mode = dump_mode, .strip = strip, .common = common, - .withbase = withbase, + .with_base = with_base, + .with_frames = with_frames, }; } diff --git a/src/Net.zig b/src/Net.zig new file mode 100644 index 00000000..2c27dabd --- /dev/null +++ b/src/Net.zig @@ -0,0 +1,1375 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +const std = @import("std"); +const builtin = @import("builtin"); +const posix = std.posix; +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const libcurl = @import("sys/libcurl.zig"); + +const log = @import("log.zig"); +const Config = @import("Config.zig"); +const assert = @import("lightpanda").assert; + +pub const ENABLE_DEBUG = false; +const IS_DEBUG = builtin.mode == .Debug; + +pub const Blob = libcurl.CurlBlob; +pub const WaitFd = libcurl.CurlWaitFd; +pub const writefunc_error = libcurl.curl_writefunc_error; + +const Error = libcurl.Error; +const ErrorMulti = libcurl.ErrorMulti; +const errorFromCode = libcurl.errorFromCode; +const errorMFromCode = libcurl.errorMFromCode; +const errorCheck = libcurl.errorCheck; +const errorMCheck = libcurl.errorMCheck; + +pub fn curl_version() [*c]const u8 { + return libcurl.curl_version(); +} + +pub const Method = enum(u8) { + GET = 0, + PUT = 1, + POST = 2, + DELETE = 3, + HEAD = 4, + OPTIONS = 5, + PATCH = 6, + PROPFIND = 7, +}; + +pub const Header = struct { + name: []const u8, + value: []const u8, +}; + +pub const Headers = struct { + headers: ?*libcurl.CurlSList, + cookies: ?[*c]const u8, + + pub fn init(user_agent: [:0]const u8) !Headers { + const header_list = libcurl.curl_slist_append(null, user_agent); + if (header_list == null) { + return error.OutOfMemory; + } + return .{ .headers = header_list, .cookies = null }; + } + + pub fn deinit(self: *const Headers) void { + if (self.headers) |hdr| { + libcurl.curl_slist_free_all(hdr); + } + } + + pub fn add(self: *Headers, header: [*c]const u8) !void { + // Copies the value + const updated_headers = libcurl.curl_slist_append(self.headers, header); + if (updated_headers == null) { + return error.OutOfMemory; + } + + self.headers = updated_headers; + } + + fn parseHeader(header_str: []const u8) ?Header { + const colon_pos = std.mem.indexOfScalar(u8, header_str, ':') orelse return null; + + const name = std.mem.trim(u8, header_str[0..colon_pos], " \t"); + const value = std.mem.trim(u8, header_str[colon_pos + 1 ..], " \t"); + + return .{ .name = name, .value = value }; + } + + pub fn iterator(self: *Headers) Iterator { + return .{ + .header = self.headers, + .cookies = self.cookies, + }; + } + + const Iterator = struct { + header: [*c]libcurl.CurlSList, + cookies: ?[*c]const u8, + + pub fn next(self: *Iterator) ?Header { + const h = self.header orelse { + const cookies = self.cookies orelse return null; + self.cookies = null; + return .{ .name = "Cookie", .value = std.mem.span(@as([*:0]const u8, cookies)) }; + }; + + self.header = h.*.next; + return parseHeader(std.mem.span(@as([*:0]const u8, @ptrCast(h.*.data)))); + } + }; +}; + +// In normal cases, the header iterator comes from the curl linked list. +// But it's also possible to inject a response, via `transfer.fulfill`. In that +// case, the resposne headers are a list, []const Http.Header. +// This union, is an iterator that exposes the same API for either case. +pub const HeaderIterator = union(enum) { + curl: CurlHeaderIterator, + list: ListHeaderIterator, + + pub fn next(self: *HeaderIterator) ?Header { + switch (self.*) { + inline else => |*it| return it.next(), + } + } + + const CurlHeaderIterator = struct { + conn: *const Connection, + prev: ?*libcurl.CurlHeader = null, + + pub fn next(self: *CurlHeaderIterator) ?Header { + const h = libcurl.curl_easy_nextheader(self.conn.easy, .header, -1, self.prev) orelse return null; + self.prev = h; + + const header = h.*; + return .{ + .name = std.mem.span(header.name), + .value = std.mem.span(header.value), + }; + } + }; + + const ListHeaderIterator = struct { + index: usize = 0, + list: []const Header, + + pub fn next(self: *ListHeaderIterator) ?Header { + const idx = self.index; + if (idx == self.list.len) { + return null; + } + self.index = idx + 1; + return self.list[idx]; + } + }; +}; + +const HeaderValue = struct { + value: []const u8, + amount: usize, +}; + +pub const AuthChallenge = struct { + status: u16, + source: enum { server, proxy }, + scheme: enum { basic, digest }, + realm: []const u8, + + pub fn parse(status: u16, header: []const u8) !AuthChallenge { + var ac: AuthChallenge = .{ + .status = status, + .source = undefined, + .realm = "TODO", // TODO parser and set realm + .scheme = undefined, + }; + + const sep = std.mem.indexOfPos(u8, header, 0, ": ") orelse return error.InvalidHeader; + const hname = header[0..sep]; + const hvalue = header[sep + 2 ..]; + + if (std.ascii.eqlIgnoreCase("WWW-Authenticate", hname)) { + ac.source = .server; + } else if (std.ascii.eqlIgnoreCase("Proxy-Authenticate", hname)) { + ac.source = .proxy; + } else { + return error.InvalidAuthChallenge; + } + + const pos = std.mem.indexOfPos(u8, std.mem.trim(u8, hvalue, std.ascii.whitespace[0..]), 0, " ") orelse hvalue.len; + const _scheme = hvalue[0..pos]; + if (std.ascii.eqlIgnoreCase(_scheme, "basic")) { + ac.scheme = .basic; + } else if (std.ascii.eqlIgnoreCase(_scheme, "digest")) { + ac.scheme = .digest; + } else { + return error.UnknownAuthChallengeScheme; + } + + return ac; + } +}; + +pub const ResponseHead = struct { + pub const MAX_CONTENT_TYPE_LEN = 64; + + status: u16, + url: [*c]const u8, + redirect_count: u32, + _content_type_len: usize = 0, + _content_type: [MAX_CONTENT_TYPE_LEN]u8 = undefined, + // this is normally an empty list, but if the response is being injected + // than it'll be populated. It isn't meant to be used directly, but should + // be used through the transfer.responseHeaderIterator() which abstracts + // whether the headers are from a live curl easy handle, or injected. + _injected_headers: []const Header = &.{}, + + pub fn contentType(self: *ResponseHead) ?[]u8 { + if (self._content_type_len == 0) { + return null; + } + return self._content_type[0..self._content_type_len]; + } +}; + +pub fn globalInit() Error!void { + try libcurl.curl_global_init(.{ .ssl = true }); +} + +pub fn globalDeinit() void { + libcurl.curl_global_cleanup(); +} + +pub const Connection = struct { + easy: *libcurl.Curl, + node: Handles.HandleList.Node = .{}, + + pub fn init( + ca_blob_: ?libcurl.CurlBlob, + config: *const Config, + ) !Connection { + const easy = libcurl.curl_easy_init() orelse return error.FailedToInitializeEasy; + errdefer libcurl.curl_easy_cleanup(easy); + + // timeouts + try libcurl.curl_easy_setopt(easy, .timeout_ms, config.httpTimeout()); + try libcurl.curl_easy_setopt(easy, .connect_timeout_ms, config.httpConnectTimeout()); + + // redirect behavior + try libcurl.curl_easy_setopt(easy, .max_redirs, config.httpMaxRedirects()); + try libcurl.curl_easy_setopt(easy, .follow_location, 2); + try libcurl.curl_easy_setopt(easy, .redir_protocols_str, "HTTP,HTTPS"); // remove FTP and FTPS from the default + + // proxy + const http_proxy = config.httpProxy(); + if (http_proxy) |proxy| { + try libcurl.curl_easy_setopt(easy, .proxy, proxy.ptr); + } + + // tls + if (ca_blob_) |ca_blob| { + try libcurl.curl_easy_setopt(easy, .ca_info_blob, ca_blob); + if (http_proxy != null) { + try libcurl.curl_easy_setopt(easy, .proxy_ca_info_blob, ca_blob); + } + } else { + assert(config.tlsVerifyHost() == false, "Http.init tls_verify_host", .{}); + + try libcurl.curl_easy_setopt(easy, .ssl_verify_host, false); + try libcurl.curl_easy_setopt(easy, .ssl_verify_peer, false); + + if (http_proxy != null) { + try libcurl.curl_easy_setopt(easy, .proxy_ssl_verify_host, false); + try libcurl.curl_easy_setopt(easy, .proxy_ssl_verify_peer, false); + } + } + + // compression, don't remove this. CloudFront will send gzip content + // even if we don't support it, and then it won't be decompressed. + // empty string means: use whatever's available + try libcurl.curl_easy_setopt(easy, .accept_encoding, ""); + + // debug + if (comptime ENABLE_DEBUG) { + try libcurl.curl_easy_setopt(easy, .verbose, true); + + // Sometimes the default debug output hides some useful data. You can + // uncomment the following line (BUT KEEP THE LIVE ABOVE AS-IS), to + // get more control over the data (specifically, the `CURLINFO_TEXT` + // can include useful data). + + // try libcurl.curl_easy_setopt(easy, .debug_function, debugCallback); + } + + return .{ + .easy = easy, + }; + } + + pub fn deinit(self: *const Connection) void { + libcurl.curl_easy_cleanup(self.easy); + } + + pub fn setURL(self: *const Connection, url: [:0]const u8) !void { + try libcurl.curl_easy_setopt(self.easy, .url, url.ptr); + } + + // a libcurl request has 2 methods. The first is the method that + // controls how libcurl behaves. This specifically influences how redirects + // are handled. For example, if you do a POST and get a 301, libcurl will + // change that to a GET. But if you do a POST and get a 308, libcurl will + // keep the POST (and re-send the body). + // The second method is the actual string that's included in the request + // headers. + // These two methods can be different - you can tell curl to behave as though + // you made a GET, but include "POST" in the request header. + // + // Here, we're only concerned about the 2nd method. If we want, we'll set + // the first one based on whether or not we have a body. + // + // It's important that, for each use of this connection, we set the 2nd + // method. Else, if we make a HEAD request and re-use the connection, but + // DON'T reset this, it'll keep making HEAD requests. + // (I don't know if it's as important to reset the 1st method, or if libcurl + // can infer that based on the presence of the body, but we also reset it + // to be safe); + pub fn setMethod(self: *const Connection, method: Method) !void { + const easy = self.easy; + const m: [:0]const u8 = switch (method) { + .GET => "GET", + .POST => "POST", + .PUT => "PUT", + .DELETE => "DELETE", + .HEAD => "HEAD", + .OPTIONS => "OPTIONS", + .PATCH => "PATCH", + .PROPFIND => "PROPFIND", + }; + try libcurl.curl_easy_setopt(easy, .custom_request, m.ptr); + } + + pub fn setBody(self: *const Connection, body: []const u8) !void { + const easy = self.easy; + try libcurl.curl_easy_setopt(easy, .post, true); + try libcurl.curl_easy_setopt(easy, .post_field_size, body.len); + try libcurl.curl_easy_setopt(easy, .copy_post_fields, body.ptr); + } + + pub fn setGetMode(self: *const Connection) !void { + try libcurl.curl_easy_setopt(self.easy, .http_get, true); + } + + pub fn setHeaders(self: *const Connection, headers: *Headers) !void { + try libcurl.curl_easy_setopt(self.easy, .http_header, headers.headers); + } + + pub fn setCookies(self: *const Connection, cookies: [*c]const u8) !void { + try libcurl.curl_easy_setopt(self.easy, .cookie, cookies); + } + + pub fn setPrivate(self: *const Connection, ptr: *anyopaque) !void { + try libcurl.curl_easy_setopt(self.easy, .private, ptr); + } + + pub fn setProxyCredentials(self: *const Connection, creds: [:0]const u8) !void { + try libcurl.curl_easy_setopt(self.easy, .proxy_user_pwd, creds.ptr); + } + + pub fn setCallbacks( + self: *const Connection, + comptime header_cb: libcurl.CurlHeaderFunction, + comptime data_cb: libcurl.CurlWriteFunction, + ) !void { + try libcurl.curl_easy_setopt(self.easy, .header_data, self.easy); + try libcurl.curl_easy_setopt(self.easy, .header_function, header_cb); + try libcurl.curl_easy_setopt(self.easy, .write_data, self.easy); + try libcurl.curl_easy_setopt(self.easy, .write_function, data_cb); + } + + pub fn setProxy(self: *const Connection, proxy: ?[*:0]const u8) !void { + try libcurl.curl_easy_setopt(self.easy, .proxy, proxy); + } + + pub fn setTlsVerify(self: *const Connection, verify: bool, use_proxy: bool) !void { + try libcurl.curl_easy_setopt(self.easy, .ssl_verify_host, verify); + try libcurl.curl_easy_setopt(self.easy, .ssl_verify_peer, verify); + if (use_proxy) { + try libcurl.curl_easy_setopt(self.easy, .proxy_ssl_verify_host, verify); + try libcurl.curl_easy_setopt(self.easy, .proxy_ssl_verify_peer, verify); + } + } + + pub fn getEffectiveUrl(self: *const Connection) ![*c]const u8 { + var url: [*c]u8 = undefined; + try libcurl.curl_easy_getinfo(self.easy, .effective_url, &url); + return url; + } + + pub fn getResponseCode(self: *const Connection) !u16 { + var status: c_long = undefined; + try libcurl.curl_easy_getinfo(self.easy, .response_code, &status); + if (status < 0 or status > std.math.maxInt(u16)) { + return 0; + } + return @intCast(status); + } + + pub fn getRedirectCount(self: *const Connection) !u32 { + var count: c_long = undefined; + try libcurl.curl_easy_getinfo(self.easy, .redirect_count, &count); + return @intCast(count); + } + + pub fn getResponseHeader(self: *const Connection, name: [:0]const u8, index: usize) ?HeaderValue { + var hdr: ?*libcurl.CurlHeader = null; + libcurl.curl_easy_header(self.easy, name, index, .header, -1, &hdr) catch |err| { + // ErrorHeader includes OutOfMemory — rare but real errors from curl internals. + // Logged and returned as null since callers don't expect errors. + log.err(.http, "get response header", .{ + .name = name, + .err = err, + }); + return null; + }; + const h = hdr orelse return null; + return .{ + .amount = h.amount, + .value = std.mem.span(h.value), + }; + } + + pub fn getPrivate(self: *const Connection) !*anyopaque { + var private: *anyopaque = undefined; + try libcurl.curl_easy_getinfo(self.easy, .private, &private); + return private; + } + + // These are headers that may not be send to the users for inteception. + pub fn secretHeaders(_: *const Connection, headers: *Headers, http_headers: *const Config.HttpHeaders) !void { + if (http_headers.proxy_bearer_header) |hdr| { + try headers.add(hdr); + } + } + + pub fn request(self: *const Connection, http_headers: *const Config.HttpHeaders) !u16 { + var header_list = try Headers.init(http_headers.user_agent_header); + defer header_list.deinit(); + try self.secretHeaders(&header_list, http_headers); + try self.setHeaders(&header_list); + + // Add cookies. + if (header_list.cookies) |cookies| { + try self.setCookies(cookies); + } + + try libcurl.curl_easy_perform(self.easy); + return self.getResponseCode(); + } +}; + +pub const Handles = struct { + connections: []Connection, + in_use: HandleList, + available: HandleList, + multi: *libcurl.CurlM, + performing: bool = false, + + pub const HandleList = std.DoublyLinkedList; + + pub fn init( + allocator: Allocator, + ca_blob: ?libcurl.CurlBlob, + config: *const Config, + ) !Handles { + const count: usize = config.httpMaxConcurrent(); + if (count == 0) return error.InvalidMaxConcurrent; + + const multi = libcurl.curl_multi_init() orelse return error.FailedToInitializeMulti; + errdefer libcurl.curl_multi_cleanup(multi) catch {}; + + try libcurl.curl_multi_setopt(multi, .max_host_connections, config.httpMaxHostOpen()); + + const connections = try allocator.alloc(Connection, count); + errdefer allocator.free(connections); + + var available: HandleList = .{}; + for (0..count) |i| { + connections[i] = try Connection.init(ca_blob, config); + available.append(&connections[i].node); + } + + return .{ + .in_use = .{}, + .connections = connections, + .available = available, + .multi = multi, + }; + } + + pub fn deinit(self: *Handles, allocator: Allocator) void { + for (self.connections) |*conn| { + conn.deinit(); + } + allocator.free(self.connections); + libcurl.curl_multi_cleanup(self.multi) catch {}; + } + + pub fn hasAvailable(self: *const Handles) bool { + return self.available.first != null; + } + + pub fn get(self: *Handles) ?*Connection { + if (self.available.popFirst()) |node| { + node.prev = null; + node.next = null; + self.in_use.append(node); + return @as(*Connection, @fieldParentPtr("node", node)); + } + return null; + } + + pub fn add(self: *Handles, conn: *const Connection) !void { + try libcurl.curl_multi_add_handle(self.multi, conn.easy); + } + + pub fn remove(self: *Handles, conn: *Connection) void { + libcurl.curl_multi_remove_handle(self.multi, conn.easy) catch |err| { + log.fatal(.http, "multi remove handle", .{ .err = err }); + }; + var node = &conn.node; + self.in_use.remove(node); + node.prev = null; + node.next = null; + self.available.append(node); + } + + pub fn perform(self: *Handles) !c_int { + var running: c_int = undefined; + self.performing = true; + defer self.performing = false; + try libcurl.curl_multi_perform(self.multi, &running); + return running; + } + + pub fn poll(self: *Handles, extra_fds: []libcurl.CurlWaitFd, timeout_ms: c_int) !void { + try libcurl.curl_multi_poll(self.multi, extra_fds, timeout_ms, null); + } + + pub const MultiMessage = struct { + conn: Connection, + err: ?Error, + }; + + pub fn readMessage(self: *Handles) ?MultiMessage { + var messages_count: c_int = 0; + const msg = libcurl.curl_multi_info_read(self.multi, &messages_count) orelse return null; + return switch (msg.data) { + .done => |err| .{ + .conn = .{ .easy = msg.easy_handle }, + .err = err, + }, + else => unreachable, + }; + } +}; + +// TODO: on BSD / Linux, we could just read the PEM file directly. +// This whole rescan + decode is really just needed for MacOS. On Linux +// bundle.rescan does find the .pem file(s) which could be in a few different +// places, so it's still useful, just not efficient. +pub fn loadCerts(allocator: Allocator) !libcurl.CurlBlob { + var bundle: std.crypto.Certificate.Bundle = .{}; + try bundle.rescan(allocator); + defer bundle.deinit(allocator); + + const bytes = bundle.bytes.items; + if (bytes.len == 0) { + log.warn(.app, "No system certificates", .{}); + return .{ + .len = 0, + .flags = 0, + .data = bytes.ptr, + }; + } + + const encoder = std.base64.standard.Encoder; + var arr: std.ArrayList(u8) = .empty; + + const encoded_size = encoder.calcSize(bytes.len); + const buffer_size = encoded_size + + (bundle.map.count() * 75) + // start / end per certificate + extra, just in case + (encoded_size / 64) // newline per 64 characters + ; + try arr.ensureTotalCapacity(allocator, buffer_size); + errdefer arr.deinit(allocator); + var writer = arr.writer(allocator); + + var it = bundle.map.valueIterator(); + while (it.next()) |index| { + const cert = try std.crypto.Certificate.der.Element.parse(bytes, index.*); + + try writer.writeAll("-----BEGIN CERTIFICATE-----\n"); + var line_writer = LineWriter{ .inner = writer }; + try encoder.encodeWriter(&line_writer, bytes[index.*..cert.slice.end]); + try writer.writeAll("\n-----END CERTIFICATE-----\n"); + } + + // Final encoding should not be larger than our initial size estimate + assert(buffer_size > arr.items.len, "Http loadCerts", .{ .estimate = buffer_size, .len = arr.items.len }); + + // Allocate exactly the size needed and copy the data + const result = try allocator.dupe(u8, arr.items); + // Free the original oversized allocation + arr.deinit(allocator); + + return .{ + .len = result.len, + .data = result.ptr, + .flags = 0, + }; +} + +// Wraps lines @ 64 columns. A PEM is basically a base64 encoded DER (which is +// what Zig has), with lines wrapped at 64 characters and with a basic header +// and footer +const LineWriter = struct { + col: usize = 0, + inner: std.ArrayList(u8).Writer, + + pub fn writeAll(self: *LineWriter, data: []const u8) !void { + var writer = self.inner; + + var col = self.col; + const len = 64 - col; + + var remain = data; + if (remain.len > len) { + col = 0; + try writer.writeAll(data[0..len]); + try writer.writeByte('\n'); + remain = data[len..]; + } + + while (remain.len > 64) { + try writer.writeAll(remain[0..64]); + try writer.writeByte('\n'); + remain = data[len..]; + } + try writer.writeAll(remain); + self.col = col + remain.len; + } +}; + +fn debugCallback(_: *libcurl.Curl, msg_type: libcurl.CurlInfoType, raw: [*c]u8, len: usize, _: *anyopaque) c_int { + const data = raw[0..len]; + switch (msg_type) { + .text => std.debug.print("libcurl [text]: {s}\n", .{data}), + .header_out => std.debug.print("libcurl [req-h]: {s}\n", .{data}), + .header_in => std.debug.print("libcurl [res-h]: {s}\n", .{data}), + // .data_in => std.debug.print("libcurl [res-b]: {s}\n", .{data}), + else => std.debug.print("libcurl ?? {d}\n", .{msg_type}), + } + return 0; +} + +// Zig is in a weird backend transition right now. Need to determine if +// SIMD is even available. +const backend_supports_vectors = switch (builtin.zig_backend) { + .stage2_llvm, .stage2_c => true, + else => false, +}; + +// Websocket messages from client->server are masked using a 4 byte XOR mask +fn mask(m: []const u8, payload: []u8) void { + var data = payload; + + if (!comptime backend_supports_vectors) return simpleMask(m, data); + + const vector_size = std.simd.suggestVectorLength(u8) orelse @sizeOf(usize); + if (data.len >= vector_size) { + const mask_vector = std.simd.repeat(vector_size, @as(@Vector(4, u8), m[0..4].*)); + while (data.len >= vector_size) { + const slice = data[0..vector_size]; + const masked_data_slice: @Vector(vector_size, u8) = slice.*; + slice.* = masked_data_slice ^ mask_vector; + data = data[vector_size..]; + } + } + simpleMask(m, data); +} + +// Used when SIMD isn't available, or for any remaining part of the message +// which is too small to effectively use SIMD. +fn simpleMask(m: []const u8, payload: []u8) void { + for (payload, 0..) |b, i| { + payload[i] = b ^ m[i & 3]; + } +} + +const Fragments = struct { + type: Message.Type, + message: std.ArrayList(u8), +}; + +pub const Message = struct { + type: Type, + data: []const u8, + cleanup_fragment: bool, + + pub const Type = enum { + text, + binary, + close, + ping, + pong, + }; +}; + +// These are the only websocket types that we're currently sending +const OpCode = enum(u8) { + text = 128 | 1, + close = 128 | 8, + pong = 128 | 10, +}; + +fn fillWebsocketHeader(buf: std.ArrayList(u8)) []const u8 { + // can't use buf[0..10] here, because the header length + // is variable. If it's just 2 bytes, for example, we need the + // framed message to be: + // h1, h2, data + // If we use buf[0..10], we'd get: + // h1, h2, 0, 0, 0, 0, 0, 0, 0, 0, data + + var header_buf: [10]u8 = undefined; + + // -10 because we reserved 10 bytes for the header above + const header = websocketHeader(&header_buf, .text, buf.items.len - 10); + const start = 10 - header.len; + + const message = buf.items; + @memcpy(message[start..10], header); + return message[start..]; +} + +// makes the assumption that our caller reserved the first +// 10 bytes for the header +fn websocketHeader(buf: []u8, op_code: OpCode, payload_len: usize) []const u8 { + assert(buf.len == 10, "Websocket.Header", .{ .len = buf.len }); + + const len = payload_len; + buf[0] = 128 | @intFromEnum(op_code); // fin | opcode + + if (len <= 125) { + buf[1] = @intCast(len); + return buf[0..2]; + } + + if (len < 65536) { + buf[1] = 126; + buf[2] = @intCast((len >> 8) & 0xFF); + buf[3] = @intCast(len & 0xFF); + return buf[0..4]; + } + + buf[1] = 127; + buf[2] = 0; + buf[3] = 0; + buf[4] = 0; + buf[5] = 0; + buf[6] = @intCast((len >> 24) & 0xFF); + buf[7] = @intCast((len >> 16) & 0xFF); + buf[8] = @intCast((len >> 8) & 0xFF); + buf[9] = @intCast(len & 0xFF); + return buf[0..10]; +} + +fn growBuffer(allocator: Allocator, buf: []u8, required_capacity: usize) ![]u8 { + // from std.ArrayList + var new_capacity = buf.len; + while (true) { + new_capacity +|= new_capacity / 2 + 8; + if (new_capacity >= required_capacity) break; + } + + log.debug(.app, "CDP buffer growth", .{ .from = buf.len, .to = new_capacity }); + + if (allocator.resize(buf, new_capacity)) { + return buf.ptr[0..new_capacity]; + } + const new_buffer = try allocator.alloc(u8, new_capacity); + @memcpy(new_buffer[0..buf.len], buf); + allocator.free(buf); + return new_buffer; +} + +// WebSocket message reader. Given websocket message, acts as an iterator that +// can return zero or more Messages. When next returns null, any incomplete +// message will remain in reader.data +pub fn Reader(comptime EXPECT_MASK: bool) type { + return struct { + allocator: Allocator, + + // position in buf of the start of the next message + pos: usize = 0, + + // position in buf up until where we have valid data + // (any new reads must be placed after this) + len: usize = 0, + + // we add 140 to allow 1 control message (ping/pong/close) to be + // fragmented into a normal message. + buf: []u8, + + fragments: ?Fragments = null, + + const Self = @This(); + + pub fn init(allocator: Allocator) !Self { + const buf = try allocator.alloc(u8, 16 * 1024); + return .{ + .buf = buf, + .allocator = allocator, + }; + } + + pub fn deinit(self: *Self) void { + self.cleanup(); + self.allocator.free(self.buf); + } + + pub fn cleanup(self: *Self) void { + if (self.fragments) |*f| { + f.message.deinit(self.allocator); + self.fragments = null; + } + } + + pub fn readBuf(self: *Self) []u8 { + // We might have read a partial http or websocket message. + // Subsequent reads must read from where we left off. + return self.buf[self.len..]; + } + + pub fn next(self: *Self) !?Message { + LOOP: while (true) { + var buf = self.buf[self.pos..self.len]; + + const length_of_len, const message_len = extractLengths(buf) orelse { + // we don't have enough bytes + return null; + }; + + const byte1 = buf[0]; + + if (byte1 & 112 != 0) { + return error.ReservedFlags; + } + + if (comptime EXPECT_MASK) { + if (buf[1] & 128 != 128) { + // client -> server messages _must_ be masked + return error.NotMasked; + } + } else if (buf[1] & 128 != 0) { + // server -> client are never masked + return error.Masked; + } + + var is_control = false; + var is_continuation = false; + var message_type: Message.Type = undefined; + switch (byte1 & 15) { + 0 => is_continuation = true, + 1 => message_type = .text, + 2 => message_type = .binary, + 8 => { + is_control = true; + message_type = .close; + }, + 9 => { + is_control = true; + message_type = .ping; + }, + 10 => { + is_control = true; + message_type = .pong; + }, + else => return error.InvalidMessageType, + } + + if (is_control) { + if (message_len > 125) { + return error.ControlTooLarge; + } + } else if (message_len > Config.CDP_MAX_MESSAGE_SIZE) { + return error.TooLarge; + } else if (message_len > self.buf.len) { + const len = self.buf.len; + self.buf = try growBuffer(self.allocator, self.buf, message_len); + buf = self.buf[0..len]; + // we need more data + return null; + } else if (buf.len < message_len) { + // we need more data + return null; + } + + // prefix + length_of_len + mask + const header_len = 2 + length_of_len + if (comptime EXPECT_MASK) 4 else 0; + + const payload = buf[header_len..message_len]; + if (comptime EXPECT_MASK) { + mask(buf[header_len - 4 .. header_len], payload); + } + + // whatever happens after this, we know where the next message starts + self.pos += message_len; + + const fin = byte1 & 128 == 128; + + if (is_continuation) { + const fragments = &(self.fragments orelse return error.InvalidContinuation); + if (fragments.message.items.len + message_len > Config.CDP_MAX_MESSAGE_SIZE) { + return error.TooLarge; + } + + try fragments.message.appendSlice(self.allocator, payload); + + if (fin == false) { + // maybe we have more parts of the message waiting + continue :LOOP; + } + + // this continuation is done! + return .{ + .type = fragments.type, + .data = fragments.message.items, + .cleanup_fragment = true, + }; + } + + const can_be_fragmented = message_type == .text or message_type == .binary; + if (self.fragments != null and can_be_fragmented) { + // if this isn't a continuation, then we can't have fragments + return error.NestedFragementation; + } + + if (fin == false) { + if (can_be_fragmented == false) { + return error.InvalidContinuation; + } + + // not continuation, and not fin. It has to be the first message + // in a fragmented message. + var fragments = Fragments{ .message = .{}, .type = message_type }; + try fragments.message.appendSlice(self.allocator, payload); + self.fragments = fragments; + continue :LOOP; + } + + return .{ + .data = payload, + .type = message_type, + .cleanup_fragment = false, + }; + } + } + + fn extractLengths(buf: []const u8) ?struct { usize, usize } { + if (buf.len < 2) { + return null; + } + + const length_of_len: usize = switch (buf[1] & 127) { + 126 => 2, + 127 => 8, + else => 0, + }; + + if (buf.len < length_of_len + 2) { + // we definitely don't have enough buf yet + return null; + } + + const message_len = switch (length_of_len) { + 2 => @as(u16, @intCast(buf[3])) | @as(u16, @intCast(buf[2])) << 8, + 8 => @as(u64, @intCast(buf[9])) | @as(u64, @intCast(buf[8])) << 8 | @as(u64, @intCast(buf[7])) << 16 | @as(u64, @intCast(buf[6])) << 24 | @as(u64, @intCast(buf[5])) << 32 | @as(u64, @intCast(buf[4])) << 40 | @as(u64, @intCast(buf[3])) << 48 | @as(u64, @intCast(buf[2])) << 56, + else => buf[1] & 127, + } + length_of_len + 2 + if (comptime EXPECT_MASK) 4 else 0; // +2 for header prefix, +4 for mask; + + return .{ length_of_len, message_len }; + } + + // This is called after we've processed complete websocket messages (this + // only applies to websocket messages). + // There are three cases: + // 1 - We don't have any incomplete data (for a subsequent message) in buf. + // This is the easier to handle, we can set pos & len to 0. + // 2 - We have part of the next message, but we know it'll fit in the + // remaining buf. We don't need to do anything + // 3 - We have part of the next message, but either it won't fight into the + // remaining buffer, or we don't know (because we don't have enough + // of the header to tell the length). We need to "compact" the buffer + fn compact(self: *Self) void { + const pos = self.pos; + const len = self.len; + + assert(pos <= len, "Client.Reader.compact precondition", .{ .pos = pos, .len = len }); + + // how many (if any) partial bytes do we have + const partial_bytes = len - pos; + + if (partial_bytes == 0) { + // We have no partial bytes. Setting these to 0 ensures that we + // get the best utilization of our buffer + self.pos = 0; + self.len = 0; + return; + } + + const partial = self.buf[pos..len]; + + // If we have enough bytes of the next message to tell its length + // we'll be able to figure out whether we need to do anything or not. + if (extractLengths(partial)) |length_meta| { + const next_message_len = length_meta.@"1"; + // if this isn't true, then we have a full message and it + // should have been processed. + assert(pos <= len, "Client.Reader.compact postcondition", .{ .next_len = next_message_len, .partial = partial_bytes }); + + const missing_bytes = next_message_len - partial_bytes; + + const free_space = self.buf.len - len; + if (missing_bytes < free_space) { + // we have enough space in our buffer, as is, + return; + } + } + + // We're here because we either don't have enough bytes of the next + // message, or we know that it won't fit in our buffer as-is. + std.mem.copyForwards(u8, self.buf, partial); + self.pos = 0; + self.len = partial_bytes; + } + }; +} + +// In-place string lowercase +fn toLower(str: []u8) []u8 { + for (str, 0..) |ch, i| { + str[i] = std.ascii.toLower(ch); + } + return str; +} + +pub const WsConnection = struct { + // CLOSE, 2 length, code + const CLOSE_NORMAL = [_]u8{ 136, 2, 3, 232 }; // code: 1000 + const CLOSE_TOO_BIG = [_]u8{ 136, 2, 3, 241 }; // 1009 + const CLOSE_PROTOCOL_ERROR = [_]u8{ 136, 2, 3, 234 }; //code: 1002 + // "private-use" close codes must be from 4000-49999 + const CLOSE_TIMEOUT = [_]u8{ 136, 2, 15, 160 }; // code: 4000 + + socket: posix.socket_t, + socket_flags: usize, + reader: Reader(true), + send_arena: ArenaAllocator, + json_version_response: []const u8, + timeout_ms: u32, + + pub fn init(socket: posix.socket_t, allocator: Allocator, json_version_response: []const u8, timeout_ms: u32) !WsConnection { + const socket_flags = try posix.fcntl(socket, posix.F.GETFL, 0); + const nonblocking = @as(u32, @bitCast(posix.O{ .NONBLOCK = true })); + assert(socket_flags & nonblocking == nonblocking, "WsConnection.init blocking", .{}); + + var reader = try Reader(true).init(allocator); + errdefer reader.deinit(); + + return .{ + .socket = socket, + .socket_flags = socket_flags, + .reader = reader, + .send_arena = ArenaAllocator.init(allocator), + .json_version_response = json_version_response, + .timeout_ms = timeout_ms, + }; + } + + pub fn deinit(self: *WsConnection) void { + self.reader.deinit(); + self.send_arena.deinit(); + } + + pub fn send(self: *WsConnection, data: []const u8) !void { + var pos: usize = 0; + var changed_to_blocking: bool = false; + defer _ = self.send_arena.reset(.{ .retain_with_limit = 1024 * 32 }); + + defer if (changed_to_blocking) { + // We had to change our socket to blocking me to get our write out + // We need to change it back to non-blocking. + _ = posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags) catch |err| { + log.err(.app, "ws restore nonblocking", .{ .err = err }); + }; + }; + + LOOP: while (pos < data.len) { + const written = posix.write(self.socket, data[pos..]) catch |err| switch (err) { + error.WouldBlock => { + // self.socket is nonblocking, because we don't want to block + // reads. But our life is a lot easier if we block writes, + // largely, because we don't have to maintain a queue of pending + // writes (which would each need their own allocations). So + // if we get a WouldBlock error, we'll switch the socket to + // blocking and switch it back to non-blocking after the write + // is complete. Doesn't seem particularly efficiently, but + // this should virtually never happen. + assert(changed_to_blocking == false, "WsConnection.double block", .{}); + changed_to_blocking = true; + _ = try posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags & ~@as(u32, @bitCast(posix.O{ .NONBLOCK = true }))); + continue :LOOP; + }, + else => return err, + }; + + if (written == 0) { + return error.Closed; + } + pos += written; + } + } + + const EMPTY_PONG = [_]u8{ 138, 0 }; + + fn sendPong(self: *WsConnection, data: []const u8) !void { + if (data.len == 0) { + return self.send(&EMPTY_PONG); + } + var header_buf: [10]u8 = undefined; + const header = websocketHeader(&header_buf, .pong, data.len); + + const allocator = self.send_arena.allocator(); + const framed = try allocator.alloc(u8, header.len + data.len); + @memcpy(framed[0..header.len], header); + @memcpy(framed[header.len..], data); + return self.send(framed); + } + + // called by CDP + // Websocket frames have a variable length header. For server-client, + // it could be anywhere from 2 to 10 bytes. Our IO.Loop doesn't have + // writev, so we need to get creative. We'll JSON serialize to a + // buffer, where the first 10 bytes are reserved. We can then backfill + // the header and send the slice. + pub fn sendJSON(self: *WsConnection, message: anytype, opts: std.json.Stringify.Options) !void { + const allocator = self.send_arena.allocator(); + + var aw = try std.Io.Writer.Allocating.initCapacity(allocator, 512); + + // reserve space for the maximum possible header + try aw.writer.writeAll(&.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); + try std.json.Stringify.value(message, opts, &aw.writer); + const framed = fillWebsocketHeader(aw.toArrayList()); + return self.send(framed); + } + + pub fn sendJSONRaw( + self: *WsConnection, + buf: std.ArrayList(u8), + ) !void { + // Dangerous API!. We assume the caller has reserved the first 10 + // bytes in `buf`. + const framed = fillWebsocketHeader(buf); + return self.send(framed); + } + + pub fn read(self: *WsConnection) !usize { + const n = try posix.read(self.socket, self.reader.readBuf()); + self.reader.len += n; + return n; + } + + pub fn processMessages(self: *WsConnection, handler: anytype) !bool { + var reader = &self.reader; + while (true) { + const msg = reader.next() catch |err| { + switch (err) { + error.TooLarge => self.send(&CLOSE_TOO_BIG) catch {}, + error.NotMasked => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, + error.ReservedFlags => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, + error.InvalidMessageType => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, + error.ControlTooLarge => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, + error.InvalidContinuation => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, + error.NestedFragementation => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, + error.OutOfMemory => {}, // don't borther trying to send an error in this case + } + return err; + } orelse break; + + switch (msg.type) { + .pong => {}, + .ping => try self.sendPong(msg.data), + .close => { + self.send(&CLOSE_NORMAL) catch {}; + return false; + }, + .text, .binary => if (handler.handleMessage(msg.data) == false) { + return false; + }, + } + if (msg.cleanup_fragment) { + reader.cleanup(); + } + } + + // We might have read part of the next message. Our reader potentially + // has to move data around in its buffer to make space. + reader.compact(); + return true; + } + + pub fn upgrade(self: *WsConnection, request: []u8) !void { + // our caller already confirmed that we have a trailing \r\n\r\n + const request_line_end = std.mem.indexOfScalar(u8, request, '\r') orelse unreachable; + const request_line = request[0..request_line_end]; + + if (!std.ascii.endsWithIgnoreCase(request_line, "http/1.1")) { + return error.InvalidProtocol; + } + + // we need to extract the sec-websocket-key value + var key: []const u8 = ""; + + // we need to make sure that we got all the necessary headers + values + var required_headers: u8 = 0; + + // can't std.mem.split because it forces the iterated value to be const + // (we could @constCast...) + + var buf = request[request_line_end + 2 ..]; + + while (buf.len > 4) { + const index = std.mem.indexOfScalar(u8, buf, '\r') orelse unreachable; + const separator = std.mem.indexOfScalar(u8, buf[0..index], ':') orelse return error.InvalidRequest; + + const name = std.mem.trim(u8, toLower(buf[0..separator]), &std.ascii.whitespace); + const value = std.mem.trim(u8, buf[(separator + 1)..index], &std.ascii.whitespace); + + if (std.mem.eql(u8, name, "upgrade")) { + if (!std.ascii.eqlIgnoreCase("websocket", value)) { + return error.InvalidUpgradeHeader; + } + required_headers |= 1; + } else if (std.mem.eql(u8, name, "sec-websocket-version")) { + if (value.len != 2 or value[0] != '1' or value[1] != '3') { + return error.InvalidVersionHeader; + } + required_headers |= 2; + } else if (std.mem.eql(u8, name, "connection")) { + // find if connection header has upgrade in it, example header: + // Connection: keep-alive, Upgrade + if (std.ascii.indexOfIgnoreCase(value, "upgrade") == null) { + return error.InvalidConnectionHeader; + } + required_headers |= 4; + } else if (std.mem.eql(u8, name, "sec-websocket-key")) { + key = value; + required_headers |= 8; + } + + const next = index + 2; + buf = buf[next..]; + } + + if (required_headers != 15) { + return error.MissingHeaders; + } + + // our caller has already made sure this request ended in \r\n\r\n + // so it isn't something we need to check again + + const alloc = self.send_arena.allocator(); + + const response = blk: { + // Response to an ugprade request is always this, with + // the Sec-Websocket-Accept value a spacial sha1 hash of the + // request "sec-websocket-version" and a magic value. + + const template = + "HTTP/1.1 101 Switching Protocols\r\n" ++ + "Upgrade: websocket\r\n" ++ + "Connection: upgrade\r\n" ++ + "Sec-Websocket-Accept: 0000000000000000000000000000\r\n\r\n"; + + // The response will be sent via the IO Loop and thus has to have its + // own lifetime. + const res = try alloc.dupe(u8, template); + + // magic response + const key_pos = res.len - 32; + var h: [20]u8 = undefined; + var hasher = std.crypto.hash.Sha1.init(.{}); + hasher.update(key); + // websocket spec always used this value + hasher.update("258EAFA5-E914-47DA-95CA-C5AB0DC85B11"); + hasher.final(&h); + + _ = std.base64.standard.Encoder.encode(res[key_pos .. key_pos + 28], h[0..]); + + break :blk res; + }; + + return self.send(response); + } + + pub fn sendHttpError(self: *WsConnection, comptime status: u16, comptime body: []const u8) void { + const response = std.fmt.comptimePrint( + "HTTP/1.1 {d} \r\nConnection: Close\r\nContent-Length: {d}\r\n\r\n{s}", + .{ status, body.len, body }, + ); + + // we're going to close this connection anyways, swallowing any + // error seems safe + self.send(response) catch {}; + } + + pub fn getAddress(self: *WsConnection) !std.net.Address { + var address: std.net.Address = undefined; + var socklen: posix.socklen_t = @sizeOf(std.net.Address); + try posix.getpeername(self.socket, &address.any, &socklen); + return address; + } + + pub fn shutdown(self: *WsConnection) void { + posix.shutdown(self.socket, .recv) catch {}; + } + + pub fn setBlocking(self: *WsConnection, blocking: bool) !void { + if (blocking) { + _ = try posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags & ~@as(u32, @bitCast(posix.O{ .NONBLOCK = true }))); + } else { + _ = try posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags); + } + } +}; + +const testing = std.testing; + +test "mask" { + var buf: [4000]u8 = undefined; + const messages = [_][]const u8{ "1234", "1234" ** 99, "1234" ** 999 }; + for (messages) |message| { + // we need the message to be mutable since mask operates in-place + const payload = buf[0..message.len]; + @memcpy(payload, message); + + mask(&.{ 1, 2, 200, 240 }, payload); + try testing.expectEqual(false, std.mem.eql(u8, payload, message)); + + mask(&.{ 1, 2, 200, 240 }, payload); + try testing.expectEqual(true, std.mem.eql(u8, payload, message)); + } +} diff --git a/src/Server.zig b/src/Server.zig index 9f060a1c..1b0c5099 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -30,6 +30,7 @@ const log = @import("log.zig"); const App = @import("App.zig"); const Config = @import("Config.zig"); const CDP = @import("cdp/cdp.zig").CDP; +const Net = @import("Net.zig"); const Http = @import("http/Http.zig"); const HttpClient = @import("http/Client.zig"); @@ -265,21 +266,7 @@ pub const Client = struct { allocator: Allocator, app: *App, http: *HttpClient, - json_version_response: []const u8, - reader: Reader(true), - socket: posix.socket_t, - socket_flags: usize, - send_arena: ArenaAllocator, - timeout_ms: u32, - - const EMPTY_PONG = [_]u8{ 138, 0 }; - - // CLOSE, 2 length, code - const CLOSE_NORMAL = [_]u8{ 136, 2, 3, 232 }; // code: 1000 - const CLOSE_TOO_BIG = [_]u8{ 136, 2, 3, 241 }; // 1009 - const CLOSE_PROTOCOL_ERROR = [_]u8{ 136, 2, 3, 234 }; //code: 1002 - // "private-use" close codes must be from 4000-49999 - const CLOSE_TIMEOUT = [_]u8{ 136, 2, 15, 160 }; // code: 4000 + ws: Net.WsConnection, fn init( socket: posix.socket_t, @@ -288,40 +275,28 @@ pub const Client = struct { json_version_response: []const u8, timeout_ms: u32, ) !Client { + var ws = try Net.WsConnection.init(socket, allocator, json_version_response, timeout_ms); + errdefer ws.deinit(); + if (log.enabled(.app, .info)) { - var client_address: std.net.Address = undefined; - var socklen: posix.socklen_t = @sizeOf(net.Address); - try std.posix.getsockname(socket, &client_address.any, &socklen); + const client_address = ws.getAddress() catch null; log.info(.app, "client connected", .{ .ip = client_address }); } - const socket_flags = try posix.fcntl(socket, posix.F.GETFL, 0); - const nonblocking = @as(u32, @bitCast(posix.O{ .NONBLOCK = true })); - // we expect the socket to come to us as nonblocking - lp.assert(socket_flags & nonblocking == nonblocking, "Client.init blocking", .{}); - - var reader = try Reader(true).init(allocator); - errdefer reader.deinit(); - const http = try app.http.createClient(allocator); errdefer http.deinit(); return .{ - .socket = socket, .allocator = allocator, .app = app, .http = http, - .json_version_response = json_version_response, - .reader = reader, + .ws = ws, .mode = .{ .http = {} }, - .socket_flags = socket_flags, - .send_arena = ArenaAllocator.init(allocator), - .timeout_ms = timeout_ms, }; } fn stop(self: *Client) void { - posix.shutdown(self.socket, .recv) catch {}; + self.ws.shutdown(); } fn deinit(self: *Client) void { @@ -329,15 +304,14 @@ pub const Client = struct { .cdp => |*cdp| cdp.deinit(), .http => {}, } - self.reader.deinit(); - self.send_arena.deinit(); + self.ws.deinit(); self.http.deinit(); } fn start(self: *Client) void { const http = self.http; http.cdp_client = .{ - .socket = self.socket, + .socket = self.ws.socket, .ctx = self, .blocking_read_start = Client.blockingReadStart, .blocking_read = Client.blockingRead, @@ -352,8 +326,9 @@ pub const Client = struct { fn httpLoop(self: *Client, http: *HttpClient) !void { lp.assert(self.mode == .http, "Client.httpLoop invalid mode", .{}); + while (true) { - const status = http.tick(self.timeout_ms) catch |err| { + const status = http.tick(self.ws.timeout_ms) catch |err| { log.err(.app, "http tick", .{ .err = err }); return; }; @@ -371,13 +346,9 @@ pub const Client = struct { } } - return self.cdpLoop(http); - } - - fn cdpLoop(self: *Client, http: *HttpClient) !void { var cdp = &self.mode.cdp; var last_message = timestamp(.monotonic); - var ms_remaining = self.timeout_ms; + var ms_remaining = self.ws.timeout_ms; while (true) { switch (cdp.pageWait(ms_remaining)) { @@ -386,7 +357,7 @@ pub const Client = struct { return; } last_message = timestamp(.monotonic); - ms_remaining = self.timeout_ms; + ms_remaining = self.ws.timeout_ms; }, .no_page => { const status = http.tick(ms_remaining) catch |err| { @@ -401,7 +372,7 @@ pub const Client = struct { return; } last_message = timestamp(.monotonic); - ms_remaining = self.timeout_ms; + ms_remaining = self.ws.timeout_ms; }, .done => { const elapsed = timestamp(.monotonic) - last_message; @@ -417,7 +388,7 @@ pub const Client = struct { fn blockingReadStart(ctx: *anyopaque) bool { const self: *Client = @ptrCast(@alignCast(ctx)); - _ = posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags & ~@as(u32, @bitCast(posix.O{ .NONBLOCK = true }))) catch |err| { + self.ws.setBlocking(true) catch |err| { log.warn(.app, "CDP blockingReadStart", .{ .err = err }); return false; }; @@ -431,7 +402,7 @@ pub const Client = struct { fn blockingReadStop(ctx: *anyopaque) bool { const self: *Client = @ptrCast(@alignCast(ctx)); - _ = posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags) catch |err| { + self.ws.setBlocking(false) catch |err| { log.warn(.app, "CDP blockingReadStop", .{ .err = err }); return false; }; @@ -439,7 +410,7 @@ pub const Client = struct { } fn readSocket(self: *Client) bool { - const n = posix.read(self.socket, self.readBuf()) catch |err| { + const n = self.ws.read() catch |err| { log.warn(.app, "CDP read", .{ .err = err }); return false; }; @@ -449,16 +420,10 @@ pub const Client = struct { return false; } - return self.processData(n) catch false; + return self.processData() catch false; } - fn readBuf(self: *Client) []u8 { - return self.reader.readBuf(); - } - - fn processData(self: *Client, len: usize) !bool { - self.reader.len += len; - + fn processData(self: *Client) !bool { switch (self.mode) { .cdp => |*cdp| return self.processWebsocketMessage(cdp), .http => return self.processHTTPRequest(), @@ -466,8 +431,8 @@ pub const Client = struct { } fn processHTTPRequest(self: *Client) !bool { - lp.assert(self.reader.pos == 0, "Client.HTTP pos", .{ .pos = self.reader.pos }); - const request = self.reader.buf[0..self.reader.len]; + lp.assert(self.ws.reader.pos == 0, "Client.HTTP pos", .{ .pos = self.ws.reader.pos }); + const request = self.ws.reader.buf[0..self.ws.reader.len]; if (request.len > Config.CDP_MAX_HTTP_REQUEST_SIZE) { self.writeHTTPErrorResponse(413, "Request too large"); @@ -481,7 +446,7 @@ pub const Client = struct { } // the next incoming data can go to the front of our buffer - defer self.reader.len = 0; + defer self.ws.reader.len = 0; return self.handleHTTPRequest(request) catch |err| { switch (err) { error.NotFound => self.writeHTTPErrorResponse(404, "Not found"), @@ -521,15 +486,15 @@ pub const Client = struct { return true; } - if (std.mem.eql(u8, url, "/json/version")) { - try self.send(self.json_version_response); + if (std.mem.eql(u8, url, "/json/version") or std.mem.eql(u8, url, "/json/version/")) { + try self.ws.send(self.ws.json_version_response); // Chromedp (a Go driver) does an http request to /json/version // then to / (websocket upgrade) using a different connection. // Since we only allow 1 connection at a time, the 2nd one (the // websocket upgrade) blocks until the first one times out. // We can avoid that by closing the connection. json_version_response // has a Connection: Close header too. - try posix.shutdown(self.socket, .recv); + self.ws.shutdown(); return false; } @@ -537,581 +502,31 @@ pub const Client = struct { } fn upgradeConnection(self: *Client, request: []u8) !void { - // our caller already confirmed that we have a trailing \r\n\r\n - const request_line_end = std.mem.indexOfScalar(u8, request, '\r') orelse unreachable; - const request_line = request[0..request_line_end]; - - if (!std.ascii.endsWithIgnoreCase(request_line, "http/1.1")) { - return error.InvalidProtocol; - } - - // we need to extract the sec-websocket-key value - var key: []const u8 = ""; - - // we need to make sure that we got all the necessary headers + values - var required_headers: u8 = 0; - - // can't std.mem.split because it forces the iterated value to be const - // (we could @constCast...) - - var buf = request[request_line_end + 2 ..]; - - while (buf.len > 4) { - const index = std.mem.indexOfScalar(u8, buf, '\r') orelse unreachable; - const separator = std.mem.indexOfScalar(u8, buf[0..index], ':') orelse return error.InvalidRequest; - - const name = std.mem.trim(u8, toLower(buf[0..separator]), &std.ascii.whitespace); - const value = std.mem.trim(u8, buf[(separator + 1)..index], &std.ascii.whitespace); - - if (std.mem.eql(u8, name, "upgrade")) { - if (!std.ascii.eqlIgnoreCase("websocket", value)) { - return error.InvalidUpgradeHeader; - } - required_headers |= 1; - } else if (std.mem.eql(u8, name, "sec-websocket-version")) { - if (value.len != 2 or value[0] != '1' or value[1] != '3') { - return error.InvalidVersionHeader; - } - required_headers |= 2; - } else if (std.mem.eql(u8, name, "connection")) { - // find if connection header has upgrade in it, example header: - // Connection: keep-alive, Upgrade - if (std.ascii.indexOfIgnoreCase(value, "upgrade") == null) { - return error.InvalidConnectionHeader; - } - required_headers |= 4; - } else if (std.mem.eql(u8, name, "sec-websocket-key")) { - key = value; - required_headers |= 8; - } - - const next = index + 2; - buf = buf[next..]; - } - - if (required_headers != 15) { - return error.MissingHeaders; - } - - // our caller has already made sure this request ended in \r\n\r\n - // so it isn't something we need to check again - - const allocator = self.send_arena.allocator(); - - const response = blk: { - // Response to an ugprade request is always this, with - // the Sec-Websocket-Accept value a spacial sha1 hash of the - // request "sec-websocket-version" and a magic value. - - const template = - "HTTP/1.1 101 Switching Protocols\r\n" ++ - "Upgrade: websocket\r\n" ++ - "Connection: upgrade\r\n" ++ - "Sec-Websocket-Accept: 0000000000000000000000000000\r\n\r\n"; - - // The response will be sent via the IO Loop and thus has to have its - // own lifetime. - const res = try allocator.dupe(u8, template); - - // magic response - const key_pos = res.len - 32; - var h: [20]u8 = undefined; - var hasher = std.crypto.hash.Sha1.init(.{}); - hasher.update(key); - // websocket spec always used this value - hasher.update("258EAFA5-E914-47DA-95CA-C5AB0DC85B11"); - hasher.final(&h); - - _ = std.base64.standard.Encoder.encode(res[key_pos .. key_pos + 28], h[0..]); - - break :blk res; - }; - + try self.ws.upgrade(request); self.mode = .{ .cdp = try CDP.init(self.app, self.http, self) }; - return self.send(response); } fn writeHTTPErrorResponse(self: *Client, comptime status: u16, comptime body: []const u8) void { - const response = std.fmt.comptimePrint( - "HTTP/1.1 {d} \r\nConnection: Close\r\nContent-Length: {d}\r\n\r\n{s}", - .{ status, body.len, body }, - ); - - // we're going to close this connection anyways, swallowing any - // error seems safe - self.send(response) catch {}; + self.ws.sendHttpError(status, body); } fn processWebsocketMessage(self: *Client, cdp: *CDP) !bool { - var reader = &self.reader; - while (true) { - const msg = reader.next() catch |err| { - switch (err) { - error.TooLarge => self.send(&CLOSE_TOO_BIG) catch {}, - error.NotMasked => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, - error.ReservedFlags => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, - error.InvalidMessageType => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, - error.ControlTooLarge => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, - error.InvalidContinuation => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, - error.NestedFragementation => self.send(&CLOSE_PROTOCOL_ERROR) catch {}, - error.OutOfMemory => {}, // don't borther trying to send an error in this case - } - return err; - } orelse break; - - switch (msg.type) { - .pong => {}, - .ping => try self.sendPong(msg.data), - .close => { - self.send(&CLOSE_NORMAL) catch {}; - return false; - }, - .text, .binary => if (cdp.handleMessage(msg.data) == false) { - return false; - }, - } - if (msg.cleanup_fragment) { - reader.cleanup(); - } - } - - // We might have read part of the next message. Our reader potentially - // has to move data around in its buffer to make space. - reader.compact(); - return true; + return self.ws.processMessages(cdp); } - fn sendPong(self: *Client, data: []const u8) !void { - if (data.len == 0) { - return self.send(&EMPTY_PONG); - } - var header_buf: [10]u8 = undefined; - const header = websocketHeader(&header_buf, .pong, data.len); - - const allocator = self.send_arena.allocator(); - var framed = try allocator.alloc(u8, header.len + data.len); - @memcpy(framed[0..header.len], header); - @memcpy(framed[header.len..], data); - return self.send(framed); + pub fn sendAllocator(self: *Client) Allocator { + return self.ws.send_arena.allocator(); } - // called by CDP - // Websocket frames have a variable length header. For server-client, - // it could be anywhere from 2 to 10 bytes. Our IO.Loop doesn't have - // writev, so we need to get creative. We'll JSON serialize to a - // buffer, where the first 10 bytes are reserved. We can then backfill - // the header and send the slice. pub fn sendJSON(self: *Client, message: anytype, opts: std.json.Stringify.Options) !void { - const allocator = self.send_arena.allocator(); - - var aw = try std.Io.Writer.Allocating.initCapacity(allocator, 512); - - // reserve space for the maximum possible header - try aw.writer.writeAll(&.{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); - try std.json.Stringify.value(message, opts, &aw.writer); - const framed = fillWebsocketHeader(aw.toArrayList()); - return self.send(framed); + return self.ws.sendJSON(message, opts); } - pub fn sendJSONRaw( - self: *Client, - buf: std.ArrayList(u8), - ) !void { - // Dangerous API!. We assume the caller has reserved the first 10 - // bytes in `buf`. - const framed = fillWebsocketHeader(buf); - return self.send(framed); - } - - fn send(self: *Client, data: []const u8) !void { - var pos: usize = 0; - var changed_to_blocking: bool = false; - defer _ = self.send_arena.reset(.{ .retain_with_limit = 1024 * 32 }); - - defer if (changed_to_blocking) { - // We had to change our socket to blocking me to get our write out - // We need to change it back to non-blocking. - _ = posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags) catch |err| { - log.err(.app, "CDP restore nonblocking", .{ .err = err }); - }; - }; - - LOOP: while (pos < data.len) { - const written = posix.write(self.socket, data[pos..]) catch |err| switch (err) { - error.WouldBlock => { - // self.socket is nonblocking, because we don't want to block - // reads. But our life is a lot easier if we block writes, - // largely, because we don't have to maintain a queue of pending - // writes (which would each need their own allocations). So - // if we get a WouldBlock error, we'll switch the socket to - // blocking and switch it back to non-blocking after the write - // is complete. Doesn't seem particularly efficiently, but - // this should virtually never happen. - lp.assert(changed_to_blocking == false, "Client.double block", .{}); - changed_to_blocking = true; - _ = try posix.fcntl(self.socket, posix.F.SETFL, self.socket_flags & ~@as(u32, @bitCast(posix.O{ .NONBLOCK = true }))); - continue :LOOP; - }, - else => return err, - }; - - if (written == 0) { - return error.Closed; - } - pos += written; - } + pub fn sendJSONRaw(self: *Client, buf: std.ArrayList(u8)) !void { + return self.ws.sendJSONRaw(buf); } }; -// WebSocket message reader. Given websocket message, acts as an iterator that -// can return zero or more Messages. When next returns null, any incomplete -// message will remain in reader.data -fn Reader(comptime EXPECT_MASK: bool) type { - return struct { - allocator: Allocator, - - // position in buf of the start of the next message - pos: usize = 0, - - // position in buf up until where we have valid data - // (any new reads must be placed after this) - len: usize = 0, - - // we add 140 to allow 1 control message (ping/pong/close) to be - // fragmented into a normal message. - buf: []u8, - - fragments: ?Fragments = null, - - const Self = @This(); - - fn init(allocator: Allocator) !Self { - const buf = try allocator.alloc(u8, 16 * 1024); - return .{ - .buf = buf, - .allocator = allocator, - }; - } - - fn deinit(self: *Self) void { - self.cleanup(); - self.allocator.free(self.buf); - } - - fn cleanup(self: *Self) void { - if (self.fragments) |*f| { - f.message.deinit(self.allocator); - self.fragments = null; - } - } - - fn readBuf(self: *Self) []u8 { - // We might have read a partial http or websocket message. - // Subsequent reads must read from where we left off. - return self.buf[self.len..]; - } - - fn next(self: *Self) !?Message { - LOOP: while (true) { - var buf = self.buf[self.pos..self.len]; - - const length_of_len, const message_len = extractLengths(buf) orelse { - // we don't have enough bytes - return null; - }; - - const byte1 = buf[0]; - - if (byte1 & 112 != 0) { - return error.ReservedFlags; - } - - if (comptime EXPECT_MASK) { - if (buf[1] & 128 != 128) { - // client -> server messages _must_ be masked - return error.NotMasked; - } - } else if (buf[1] & 128 != 0) { - // server -> client are never masked - return error.Masked; - } - - var is_control = false; - var is_continuation = false; - var message_type: Message.Type = undefined; - switch (byte1 & 15) { - 0 => is_continuation = true, - 1 => message_type = .text, - 2 => message_type = .binary, - 8 => { - is_control = true; - message_type = .close; - }, - 9 => { - is_control = true; - message_type = .ping; - }, - 10 => { - is_control = true; - message_type = .pong; - }, - else => return error.InvalidMessageType, - } - - if (is_control) { - if (message_len > 125) { - return error.ControlTooLarge; - } - } else if (message_len > Config.CDP_MAX_MESSAGE_SIZE) { - return error.TooLarge; - } else if (message_len > self.buf.len) { - const len = self.buf.len; - self.buf = try growBuffer(self.allocator, self.buf, message_len); - buf = self.buf[0..len]; - // we need more data - return null; - } else if (buf.len < message_len) { - // we need more data - return null; - } - - // prefix + length_of_len + mask - const header_len = 2 + length_of_len + if (comptime EXPECT_MASK) 4 else 0; - - const payload = buf[header_len..message_len]; - if (comptime EXPECT_MASK) { - mask(buf[header_len - 4 .. header_len], payload); - } - - // whatever happens after this, we know where the next message starts - self.pos += message_len; - - const fin = byte1 & 128 == 128; - - if (is_continuation) { - const fragments = &(self.fragments orelse return error.InvalidContinuation); - if (fragments.message.items.len + message_len > Config.CDP_MAX_MESSAGE_SIZE) { - return error.TooLarge; - } - - try fragments.message.appendSlice(self.allocator, payload); - - if (fin == false) { - // maybe we have more parts of the message waiting - continue :LOOP; - } - - // this continuation is done! - return .{ - .type = fragments.type, - .data = fragments.message.items, - .cleanup_fragment = true, - }; - } - - const can_be_fragmented = message_type == .text or message_type == .binary; - if (self.fragments != null and can_be_fragmented) { - // if this isn't a continuation, then we can't have fragments - return error.NestedFragementation; - } - - if (fin == false) { - if (can_be_fragmented == false) { - return error.InvalidContinuation; - } - - // not continuation, and not fin. It has to be the first message - // in a fragmented message. - var fragments = Fragments{ .message = .{}, .type = message_type }; - try fragments.message.appendSlice(self.allocator, payload); - self.fragments = fragments; - continue :LOOP; - } - - return .{ - .data = payload, - .type = message_type, - .cleanup_fragment = false, - }; - } - } - - fn extractLengths(buf: []const u8) ?struct { usize, usize } { - if (buf.len < 2) { - return null; - } - - const length_of_len: usize = switch (buf[1] & 127) { - 126 => 2, - 127 => 8, - else => 0, - }; - - if (buf.len < length_of_len + 2) { - // we definitely don't have enough buf yet - return null; - } - - const message_len = switch (length_of_len) { - 2 => @as(u16, @intCast(buf[3])) | @as(u16, @intCast(buf[2])) << 8, - 8 => @as(u64, @intCast(buf[9])) | @as(u64, @intCast(buf[8])) << 8 | @as(u64, @intCast(buf[7])) << 16 | @as(u64, @intCast(buf[6])) << 24 | @as(u64, @intCast(buf[5])) << 32 | @as(u64, @intCast(buf[4])) << 40 | @as(u64, @intCast(buf[3])) << 48 | @as(u64, @intCast(buf[2])) << 56, - else => buf[1] & 127, - } + length_of_len + 2 + if (comptime EXPECT_MASK) 4 else 0; // +2 for header prefix, +4 for mask; - - return .{ length_of_len, message_len }; - } - - // This is called after we've processed complete websocket messages (this - // only applies to websocket messages). - // There are three cases: - // 1 - We don't have any incomplete data (for a subsequent message) in buf. - // This is the easier to handle, we can set pos & len to 0. - // 2 - We have part of the next message, but we know it'll fit in the - // remaining buf. We don't need to do anything - // 3 - We have part of the next message, but either it won't fight into the - // remaining buffer, or we don't know (because we don't have enough - // of the header to tell the length). We need to "compact" the buffer - fn compact(self: *Self) void { - const pos = self.pos; - const len = self.len; - - lp.assert(pos <= len, "Client.Reader.compact precondition", .{ .pos = pos, .len = len }); - - // how many (if any) partial bytes do we have - const partial_bytes = len - pos; - - if (partial_bytes == 0) { - // We have no partial bytes. Setting these to 0 ensures that we - // get the best utilization of our buffer - self.pos = 0; - self.len = 0; - return; - } - - const partial = self.buf[pos..len]; - - // If we have enough bytes of the next message to tell its length - // we'll be able to figure out whether we need to do anything or not. - if (extractLengths(partial)) |length_meta| { - const next_message_len = length_meta.@"1"; - // if this isn't true, then we have a full message and it - // should have been processed. - lp.assert(pos <= len, "Client.Reader.compact postcondition", .{ .next_len = next_message_len, .partial = partial_bytes }); - - const missing_bytes = next_message_len - partial_bytes; - - const free_space = self.buf.len - len; - if (missing_bytes < free_space) { - // we have enough space in our buffer, as is, - return; - } - } - - // We're here because we either don't have enough bytes of the next - // message, or we know that it won't fit in our buffer as-is. - std.mem.copyForwards(u8, self.buf, partial); - self.pos = 0; - self.len = partial_bytes; - } - }; -} - -fn growBuffer(allocator: Allocator, buf: []u8, required_capacity: usize) ![]u8 { - // from std.ArrayList - var new_capacity = buf.len; - while (true) { - new_capacity +|= new_capacity / 2 + 8; - if (new_capacity >= required_capacity) break; - } - - log.debug(.app, "CDP buffer growth", .{ .from = buf.len, .to = new_capacity }); - - if (allocator.resize(buf, new_capacity)) { - return buf.ptr[0..new_capacity]; - } - const new_buffer = try allocator.alloc(u8, new_capacity); - @memcpy(new_buffer[0..buf.len], buf); - allocator.free(buf); - return new_buffer; -} - -const Fragments = struct { - type: Message.Type, - message: std.ArrayList(u8), -}; - -const Message = struct { - type: Type, - data: []const u8, - cleanup_fragment: bool, - - const Type = enum { - text, - binary, - close, - ping, - pong, - }; -}; - -// These are the only websocket types that we're currently sending -const OpCode = enum(u8) { - text = 128 | 1, - close = 128 | 8, - pong = 128 | 10, -}; - -fn fillWebsocketHeader(buf: std.ArrayList(u8)) []const u8 { - // can't use buf[0..10] here, because the header length - // is variable. If it's just 2 bytes, for example, we need the - // framed message to be: - // h1, h2, data - // If we use buf[0..10], we'd get: - // h1, h2, 0, 0, 0, 0, 0, 0, 0, 0, data - - var header_buf: [10]u8 = undefined; - - // -10 because we reserved 10 bytes for the header above - const header = websocketHeader(&header_buf, .text, buf.items.len - 10); - const start = 10 - header.len; - - const message = buf.items; - @memcpy(message[start..10], header); - return message[start..]; -} - -// makes the assumption that our caller reserved the first -// 10 bytes for the header -fn websocketHeader(buf: []u8, op_code: OpCode, payload_len: usize) []const u8 { - lp.assert(buf.len == 10, "Websocket.Header", .{ .len = buf.len }); - - const len = payload_len; - buf[0] = 128 | @intFromEnum(op_code); // fin | opcode - - if (len <= 125) { - buf[1] = @intCast(len); - return buf[0..2]; - } - - if (len < 65536) { - buf[1] = 126; - buf[2] = @intCast((len >> 8) & 0xFF); - buf[3] = @intCast(len & 0xFF); - return buf[0..4]; - } - - buf[1] = 127; - buf[2] = 0; - buf[3] = 0; - buf[4] = 0; - buf[5] = 0; - buf[6] = @intCast((len >> 24) & 0xFF); - buf[7] = @intCast((len >> 16) & 0xFF); - buf[8] = @intCast((len >> 8) & 0xFF); - buf[9] = @intCast(len & 0xFF); - return buf[0..10]; -} - // Utils // -------- @@ -1139,48 +554,6 @@ fn buildJSONVersionResponse( pub const timestamp = @import("datetime.zig").timestamp; -// In-place string lowercase -fn toLower(str: []u8) []u8 { - for (str, 0..) |c, i| { - str[i] = std.ascii.toLower(c); - } - return str; -} - -// Zig is in a weird backend transition right now. Need to determine if -// SIMD is even available. -const backend_supports_vectors = switch (builtin.zig_backend) { - .stage2_llvm, .stage2_c => true, - else => false, -}; - -// Websocket messages from client->server are masked using a 4 byte XOR mask -fn mask(m: []const u8, payload: []u8) void { - var data = payload; - - if (!comptime backend_supports_vectors) return simpleMask(m, data); - - const vector_size = std.simd.suggestVectorLength(u8) orelse @sizeOf(usize); - if (data.len >= vector_size) { - const mask_vector = std.simd.repeat(vector_size, @as(@Vector(4, u8), m[0..4].*)); - while (data.len >= vector_size) { - const slice = data[0..vector_size]; - const masked_data_slice: @Vector(vector_size, u8) = slice.*; - slice.* = masked_data_slice ^ mask_vector; - data = data[vector_size..]; - } - } - simpleMask(m, data); -} - -// Used when SIMD isn't available, or for any remaining part of the message -// which is too small to effectively use SIMD. -fn simpleMask(m: []const u8, payload: []u8) void { - for (payload, 0..) |b, i| { - payload[i] = b ^ m[i & 3]; - } -} - const testing = std.testing; test "server: buildJSONVersionResponse" { const address = try net.Address.parseIp4("127.0.0.1", 9001); @@ -1391,22 +764,6 @@ test "Client: close message" { ); } -test "server: mask" { - var buf: [4000]u8 = undefined; - const messages = [_][]const u8{ "1234", "1234" ** 99, "1234" ** 999 }; - for (messages) |message| { - // we need the message to be mutable since mask operates in-place - const payload = buf[0..message.len]; - @memcpy(payload, message); - - mask(&.{ 1, 2, 200, 240 }, payload); - try testing.expectEqual(false, std.mem.eql(u8, payload, message)); - - mask(&.{ 1, 2, 200, 240 }, payload); - try testing.expectEqual(true, std.mem.eql(u8, payload, message)); - } -} - test "server: 404" { var c = try createTestClient(); defer c.deinit(); @@ -1542,7 +899,7 @@ fn createTestClient() !TestClient { const TestClient = struct { stream: std.net.Stream, buf: [1024]u8 = undefined, - reader: Reader(false), + reader: Net.Reader(false), fn deinit(self: *TestClient) void { self.stream.close(); @@ -1609,7 +966,7 @@ const TestClient = struct { "Sec-Websocket-Accept: flzHu2DevQ2dSCSVqKSii5e9C2o=\r\n\r\n", res); } - fn readWebsocketMessage(self: *TestClient) !?Message { + fn readWebsocketMessage(self: *TestClient) !?Net.Message { while (true) { const n = try self.stream.read(self.reader.readBuf()); if (n == 0) { diff --git a/src/browser/Browser.zig b/src/browser/Browser.zig index 09a78cab..503306d3 100644 --- a/src/browser/Browser.zig +++ b/src/browser/Browser.zig @@ -87,19 +87,29 @@ pub fn closeSession(self: *Browser) void { } } -pub fn runMicrotasks(self: *const Browser) void { +pub fn runMicrotasks(self: *Browser) void { self.env.runMicrotasks(); } pub fn runMacrotasks(self: *Browser) !?u64 { - return try self.env.runMacrotasks(); + const env = &self.env; + + const time_to_next = try self.env.runMacrotasks(); + env.pumpMessageLoop(); + + // either of the above could have queued more microtasks + env.runMicrotasks(); + + return time_to_next; } -pub fn runMessageLoop(self: *const Browser) void { - while (self.env.pumpMessageLoop()) { - if (comptime IS_DEBUG) { - log.debug(.browser, "pumpMessageLoop", .{}); - } - } +pub fn hasBackgroundTasks(self: *Browser) bool { + return self.env.hasBackgroundTasks(); +} +pub fn waitForBackgroundTasks(self: *Browser) void { + self.env.waitForBackgroundTasks(); +} + +pub fn runIdleTasks(self: *const Browser) void { self.env.runIdleTasks(); } diff --git a/src/browser/EventManager.zig b/src/browser/EventManager.zig index cfe5966e..1cf82bfc 100644 --- a/src/browser/EventManager.zig +++ b/src/browser/EventManager.zig @@ -56,7 +56,12 @@ pub const EventManager = @This(); page: *Page, arena: Allocator, +// Used as an optimization in Page._documentIsComplete. If we know there are no +// 'load' listeners in the document, we can skip dispatching the per-resource +// 'load' event (e.g. amazon product page has no listener and ~350 resources) +has_dom_load_listener: bool, listener_pool: std.heap.MemoryPool(Listener), +ignore_list: std.ArrayList(*Listener), list_pool: std.heap.MemoryPool(std.DoublyLinkedList), lookup: std.HashMapUnmanaged( EventKey, @@ -72,10 +77,12 @@ pub fn init(arena: Allocator, page: *Page) EventManager { .page = page, .lookup = .{}, .arena = arena, + .ignore_list = .{}, .list_pool = .init(arena), .listener_pool = .init(arena), .dispatch_depth = 0, .deferred_removals = .{}, + .has_dom_load_listener = false, }; } @@ -106,6 +113,10 @@ pub fn register(self: *EventManager, target: *EventTarget, typ: []const u8, call // Allocate the type string we'll use in both listener and key const type_string = try String.init(self.arena, typ, .{}); + if (type_string.eql(comptime .wrap("load")) and target._type == .node) { + self.has_dom_load_listener = true; + } + const gop = try self.lookup.getOrPut(self.arena, .{ .type_string = type_string, .event_target = @intFromPtr(target), @@ -146,6 +157,11 @@ pub fn register(self: *EventManager, target: *EventTarget, typ: []const u8, call }; // append the listener to the list of listeners for this target gop.value_ptr.*.append(&listener.node); + + // Track load listeners for script execution ignore list + if (type_string.eql(comptime .wrap("load"))) { + try self.ignore_list.append(self.arena, listener); + } } pub fn remove(self: *EventManager, target: *EventTarget, typ: []const u8, callback: Callback, use_capture: bool) void { @@ -158,6 +174,10 @@ pub fn remove(self: *EventManager, target: *EventTarget, typ: []const u8, callba } } +pub fn clearIgnoreList(self: *EventManager) void { + self.ignore_list.clearRetainingCapacity(); +} + // Dispatching can be recursive from the compiler's point of view, so we need to // give it an explicit error set so that other parts of the code can use and // inferred error. @@ -169,7 +189,24 @@ const DispatchError = error{ ExecutionError, JsException, }; + +pub const DispatchOpts = struct { + // A "load" event triggered by a script (in ScriptManager) should not trigger + // a "load" listener added within that script. Therefore, any "load" listener + // that we add go into an ignore list until after the script finishes executing. + // The ignore list is only checked when apply_ignore == true, which is only + // set by the ScriptManager when raising the script's "load" event. + apply_ignore: bool = false, +}; + pub fn dispatch(self: *EventManager, target: *EventTarget, event: *Event) DispatchError!void { + return self.dispatchOpts(target, event, .{}); +} + +pub fn dispatchOpts(self: *EventManager, target: *EventTarget, event: *Event, comptime opts: DispatchOpts) DispatchError!void { + event.acquireRef(); + defer event.deinit(false, self.page); + if (comptime IS_DEBUG) { log.debug(.event, "eventManager.dispatch", .{ .type = event._type_string.str(), .bubbles = event._bubbles }); } @@ -186,7 +223,7 @@ pub fn dispatch(self: *EventManager, target: *EventTarget, event: *Event) Dispat }; switch (target._type) { - .node => |node| try self.dispatchNode(node, event, &was_handled), + .node => |node| try self.dispatchNode(node, event, &was_handled, opts), .xhr, .window, .abort_signal, @@ -197,13 +234,14 @@ pub fn dispatch(self: *EventManager, target: *EventTarget, event: *Event) Dispat .screen, .screen_orientation, .visual_viewport, + .file_reader, .generic, => { const list = self.lookup.get(.{ .event_target = @intFromPtr(target), .type_string = event._type_string, }) orelse return; - try self.dispatchAll(list, target, event, &was_handled); + try self.dispatchAll(list, target, event, &was_handled, opts); }, } } @@ -218,6 +256,9 @@ const DispatchWithFunctionOptions = struct { inject_target: bool = true, }; pub fn dispatchWithFunction(self: *EventManager, target: *EventTarget, event: *Event, function_: ?js.Function, comptime opts: DispatchWithFunctionOptions) !void { + event.acquireRef(); + defer event.deinit(false, self.page); + if (comptime IS_DEBUG) { log.debug(.event, "dispatchWithFunction", .{ .type = event._type_string.str(), .context = opts.context, .has_function = function_ != null }); } @@ -249,10 +290,10 @@ pub fn dispatchWithFunction(self: *EventManager, target: *EventTarget, event: *E .event_target = @intFromPtr(target), .type_string = event._type_string, }) orelse return; - try self.dispatchAll(list, target, event, &was_dispatched); + try self.dispatchAll(list, target, event, &was_dispatched, .{}); } -fn dispatchNode(self: *EventManager, target: *Node, event: *Event, was_handled: *bool) !void { +fn dispatchNode(self: *EventManager, target: *Node, event: *Event, was_handled: *bool, comptime opts: DispatchOpts) !void { const ShadowRoot = @import("webapi/ShadowRoot.zig"); const page = self.page; @@ -309,11 +350,14 @@ fn dispatchNode(self: *EventManager, target: *Node, event: *Event, was_handled: node = n._parent; } - // Even though the window isn't part of the DOM, events always propagate + // Even though the window isn't part of the DOM, most events propagate // through it in the capture phase (unless we stopped at a shadow boundary) - if (path_len < path_buffer.len) { - path_buffer[path_len] = page.window.asEventTarget(); - path_len += 1; + // The only explicit exception is "load" + if (event._type_string.eql(comptime .wrap("load")) == false) { + if (path_len < path_buffer.len) { + path_buffer[path_len] = page.window.asEventTarget(); + path_len += 1; + } } const path = path_buffer[0..path_len]; @@ -330,7 +374,7 @@ fn dispatchNode(self: *EventManager, target: *Node, event: *Event, was_handled: .event_target = @intFromPtr(current_target), .type_string = event._type_string, })) |list| { - try self.dispatchPhase(list, current_target, event, was_handled, true); + try self.dispatchPhase(list, current_target, event, was_handled, comptime .init(true, opts)); } } @@ -364,7 +408,7 @@ fn dispatchNode(self: *EventManager, target: *Node, event: *Event, was_handled: .type_string = event._type_string, .event_target = @intFromPtr(target_et), })) |list| { - try self.dispatchPhase(list, target_et, event, was_handled, null); + try self.dispatchPhase(list, target_et, event, was_handled, comptime .init(null, opts)); if (event._stop_propagation) { return; } @@ -381,13 +425,25 @@ fn dispatchNode(self: *EventManager, target: *Node, event: *Event, was_handled: .type_string = event._type_string, .event_target = @intFromPtr(current_target), })) |list| { - try self.dispatchPhase(list, current_target, event, was_handled, false); + try self.dispatchPhase(list, current_target, event, was_handled, comptime .init(false, opts)); } } } } -fn dispatchPhase(self: *EventManager, list: *std.DoublyLinkedList, current_target: *EventTarget, event: *Event, was_handled: *bool, comptime capture_only: ?bool) !void { +const DispatchPhaseOpts = struct { + capture_only: ?bool = null, + apply_ignore: bool = false, + + fn init(capture_only: ?bool, opts: DispatchOpts) DispatchPhaseOpts { + return .{ + .capture_only = capture_only, + .apply_ignore = opts.apply_ignore, + }; + } +}; + +fn dispatchPhase(self: *EventManager, list: *std.DoublyLinkedList, current_target: *EventTarget, event: *Event, was_handled: *bool, comptime opts: DispatchPhaseOpts) !void { const page = self.page; // Track dispatch depth for deferred removal @@ -413,7 +469,7 @@ fn dispatchPhase(self: *EventManager, list: *std.DoublyLinkedList, current_targe // Iterate through the list, stopping after we've encountered the last_listener var node = list.first; var is_done = false; - while (node) |n| { + node_loop: while (node) |n| { if (is_done) { break; } @@ -423,7 +479,7 @@ fn dispatchPhase(self: *EventManager, list: *std.DoublyLinkedList, current_targe node = n.next; // Skip non-matching listeners - if (comptime capture_only) |capture| { + if (comptime opts.capture_only) |capture| { if (listener.capture != capture) { continue; } @@ -442,6 +498,14 @@ fn dispatchPhase(self: *EventManager, list: *std.DoublyLinkedList, current_targe } } + if (comptime opts.apply_ignore) { + for (self.ignore_list.items) |ignored| { + if (ignored == listener) { + continue :node_loop; + } + } + } + // Remove "once" listeners BEFORE calling them so nested dispatches don't see them if (listener.once) { self.removeListener(list, listener); @@ -486,8 +550,8 @@ fn dispatchPhase(self: *EventManager, list: *std.DoublyLinkedList, current_targe } // Non-Node dispatching (XHR, Window without propagation) -fn dispatchAll(self: *EventManager, list: *std.DoublyLinkedList, current_target: *EventTarget, event: *Event, was_handled: *bool) !void { - return self.dispatchPhase(list, current_target, event, was_handled, null); +fn dispatchAll(self: *EventManager, list: *std.DoublyLinkedList, current_target: *EventTarget, event: *Event, was_handled: *bool, comptime opts: DispatchOpts) !void { + return self.dispatchPhase(list, current_target, event, was_handled, comptime .init(null, opts)); } fn getInlineHandler(self: *EventManager, target: *EventTarget, event: *Event) ?js.Function.Global { @@ -757,7 +821,6 @@ const ActivationState = struct { .bubbles = true, .cancelable = false, }, page); - defer if (!event._v8_handoff) event.deinit(false); const target = input.asElement().asEventTarget(); try page._event_manager.dispatch(target, event); diff --git a/src/browser/Factory.zig b/src/browser/Factory.zig index a46c3dc2..cbc2170d 100644 --- a/src/browser/Factory.zig +++ b/src/browser/Factory.zig @@ -42,12 +42,96 @@ const Allocator = std.mem.Allocator; const IS_DEBUG = builtin.mode == .Debug; const assert = std.debug.assert; +// Shared across all frames of a Page. const Factory = @This(); -_page: *Page, _arena: Allocator, _slab: SlabAllocator, +pub fn init(arena: Allocator) !*Factory { + const self = try arena.create(Factory); + self.* = .{ + ._arena = arena, + ._slab = SlabAllocator.init(arena, 128), + }; + return self; +} + +// this is a root object +pub fn eventTarget(self: *Factory, child: anytype) !*@TypeOf(child) { + return self.eventTargetWithAllocator(self._slab.allocator(), child); +} + +pub fn eventTargetWithAllocator(_: *const Factory, allocator: Allocator, child: anytype) !*@TypeOf(child) { + const chain = try PrototypeChain( + &.{ EventTarget, @TypeOf(child) }, + ).allocate(allocator); + + const event_ptr = chain.get(0); + event_ptr.* = .{ + ._type = unionInit(EventTarget.Type, chain.get(1)), + }; + chain.setLeaf(1, child); + + return chain.get(1); +} + +pub fn standaloneEventTarget(self: *Factory, child: anytype) !*EventTarget { + const allocator = self._slab.allocator(); + const et = try allocator.create(EventTarget); + et.* = .{ ._type = unionInit(EventTarget.Type, child) }; + return et; +} + +// this is a root object +pub fn event(_: *const Factory, arena: Allocator, typ: String, child: anytype) !*@TypeOf(child) { + const chain = try PrototypeChain( + &.{ Event, @TypeOf(child) }, + ).allocate(arena); + + // Special case: Event has a _type_string field, so we need manual setup + const event_ptr = chain.get(0); + event_ptr.* = try eventInit(arena, typ, chain.get(1)); + chain.setLeaf(1, child); + + return chain.get(1); +} + +pub fn uiEvent(_: *const Factory, arena: Allocator, typ: String, child: anytype) !*@TypeOf(child) { + const chain = try PrototypeChain( + &.{ Event, UIEvent, @TypeOf(child) }, + ).allocate(arena); + + // Special case: Event has a _type_string field, so we need manual setup + const event_ptr = chain.get(0); + event_ptr.* = try eventInit(arena, typ, chain.get(1)); + chain.setMiddle(1, UIEvent.Type); + chain.setLeaf(2, child); + + return chain.get(2); +} + +pub fn mouseEvent(_: *const Factory, arena: Allocator, typ: String, mouse: MouseEvent, child: anytype) !*@TypeOf(child) { + const chain = try PrototypeChain( + &.{ Event, UIEvent, MouseEvent, @TypeOf(child) }, + ).allocate(arena); + + // Special case: Event has a _type_string field, so we need manual setup + const event_ptr = chain.get(0); + event_ptr.* = try eventInit(arena, typ, chain.get(1)); + chain.setMiddle(1, UIEvent.Type); + + // Set MouseEvent with all its fields + const mouse_ptr = chain.get(2); + mouse_ptr.* = mouse; + mouse_ptr._proto = chain.get(1); + mouse_ptr._type = unionInit(MouseEvent.Type, chain.get(3)); + + chain.setLeaf(3, child); + + return chain.get(3); +} + fn PrototypeChain(comptime types: []const type) type { return struct { const Self = @This(); @@ -151,94 +235,14 @@ fn AutoPrototypeChain(comptime types: []const type) type { }; } -pub fn init(arena: Allocator, page: *Page) Factory { - return .{ - ._page = page, - ._arena = arena, - ._slab = SlabAllocator.init(arena, 128), - }; -} - -// this is a root object -pub fn eventTarget(self: *Factory, child: anytype) !*@TypeOf(child) { - const allocator = self._slab.allocator(); - const chain = try PrototypeChain( - &.{ EventTarget, @TypeOf(child) }, - ).allocate(allocator); - - const event_ptr = chain.get(0); - event_ptr.* = .{ - ._type = unionInit(EventTarget.Type, chain.get(1)), - }; - chain.setLeaf(1, child); - - return chain.get(1); -} - -pub fn standaloneEventTarget(self: *Factory, child: anytype) !*EventTarget { - const allocator = self._slab.allocator(); - const et = try allocator.create(EventTarget); - et.* = .{ ._type = unionInit(EventTarget.Type, child) }; - return et; -} - -// this is a root object -pub fn event(self: *Factory, arena: Allocator, typ: String, child: anytype) !*@TypeOf(child) { - const chain = try PrototypeChain( - &.{ Event, @TypeOf(child) }, - ).allocate(arena); - - // Special case: Event has a _type_string field, so we need manual setup - const event_ptr = chain.get(0); - event_ptr.* = try self.eventInit(arena, typ, chain.get(1)); - chain.setLeaf(1, child); - - return chain.get(1); -} - -pub fn uiEvent(self: *Factory, arena: Allocator, typ: String, child: anytype) !*@TypeOf(child) { - const chain = try PrototypeChain( - &.{ Event, UIEvent, @TypeOf(child) }, - ).allocate(arena); - - // Special case: Event has a _type_string field, so we need manual setup - const event_ptr = chain.get(0); - event_ptr.* = try self.eventInit(arena, typ, chain.get(1)); - chain.setMiddle(1, UIEvent.Type); - chain.setLeaf(2, child); - - return chain.get(2); -} - -pub fn mouseEvent(self: *Factory, arena: Allocator, typ: String, mouse: MouseEvent, child: anytype) !*@TypeOf(child) { - const chain = try PrototypeChain( - &.{ Event, UIEvent, MouseEvent, @TypeOf(child) }, - ).allocate(arena); - - // Special case: Event has a _type_string field, so we need manual setup - const event_ptr = chain.get(0); - event_ptr.* = try self.eventInit(arena, typ, chain.get(1)); - chain.setMiddle(1, UIEvent.Type); - - // Set MouseEvent with all its fields - const mouse_ptr = chain.get(2); - mouse_ptr.* = mouse; - mouse_ptr._proto = chain.get(1); - mouse_ptr._type = unionInit(MouseEvent.Type, chain.get(3)); - - chain.setLeaf(3, child); - - return chain.get(3); -} - -fn eventInit(self: *const Factory, arena: Allocator, typ: String, value: anytype) !Event { +fn eventInit(arena: Allocator, typ: String, value: anytype) !Event { // Round to 2ms for privacy (browsers do this) const raw_timestamp = @import("../datetime.zig").milliTimestamp(.monotonic); const time_stamp = (raw_timestamp / 2) * 2; return .{ + ._rc = 0, ._arena = arena, - ._page = self._page, ._type = unionInit(Event.Type, value), ._type_string = typ, ._time_stamp = time_stamp, @@ -384,7 +388,7 @@ pub fn destroy(self: *Factory, value: anytype) void { } if (comptime @hasField(S, "_proto")) { - self.destroyChain(value, true, 0, std.mem.Alignment.@"1"); + self.destroyChain(value, 0, std.mem.Alignment.@"1"); } else { self.destroyStandalone(value); } @@ -398,7 +402,6 @@ pub fn destroyStandalone(self: *Factory, value: anytype) void { fn destroyChain( self: *Factory, value: anytype, - comptime first: bool, old_size: usize, old_align: std.mem.Alignment, ) void { @@ -410,23 +413,8 @@ fn destroyChain( const new_size = current_size + @sizeOf(S); const new_align = std.mem.Alignment.max(old_align, std.mem.Alignment.of(S)); - // This is initially called from a deinit. We don't want to call that - // same deinit. So when this is the first time destroyChain is called - // we don't call deinit (because we're in that deinit) - if (!comptime first) { - // But if it isn't the first time - if (@hasDecl(S, "deinit")) { - // And it has a deinit, we'll call it - switch (@typeInfo(@TypeOf(S.deinit)).@"fn".params.len) { - 1 => value.deinit(), - 2 => value.deinit(self._page), - else => @compileLog(@typeName(S) ++ " has an invalid deinit function"), - } - } - } - if (@hasField(S, "_proto")) { - self.destroyChain(value._proto, false, new_size, new_align); + self.destroyChain(value._proto, new_size, new_align); } else { // no proto so this is the head of the chain. // we use this as the ptr to the start of the chain. diff --git a/src/browser/Mime.zig b/src/browser/Mime.zig index 02ebb9f9..43ca3632 100644 --- a/src/browser/Mime.zig +++ b/src/browser/Mime.zig @@ -24,10 +24,11 @@ params: []const u8 = "", // IANA defines max. charset value length as 40. // We keep 41 for null-termination since HTML parser expects in this format. charset: [41]u8 = default_charset, -charset_len: usize = 5, +charset_len: usize = default_charset_len, /// String "UTF-8" continued by null characters. -pub const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36; +const default_charset = .{ 'U', 'T', 'F', '-', '8' } ++ .{0} ** 36; +const default_charset_len = 5; /// Mime with unknown Content-Type, empty params and empty charset. pub const unknown = Mime{ .content_type = .{ .unknown = {} } }; @@ -127,17 +128,17 @@ pub fn parse(input: []u8) !Mime { const params = trimLeft(normalized[type_len..]); - var charset: [41]u8 = undefined; - var charset_len: usize = undefined; + var charset: [41]u8 = default_charset; + var charset_len: usize = default_charset_len; var it = std.mem.splitScalar(u8, params, ';'); while (it.next()) |attr| { - const i = std.mem.indexOfScalarPos(u8, attr, 0, '=') orelse return error.Invalid; + const i = std.mem.indexOfScalarPos(u8, attr, 0, '=') orelse continue; const name = trimLeft(attr[0..i]); const value = trimRight(attr[i + 1 ..]); if (value.len == 0) { - return error.Invalid; + continue; } const attribute_name = std.meta.stringToEnum(enum { @@ -150,7 +151,7 @@ pub fn parse(input: []u8) !Mime { break; } - const attribute_value = try parseCharset(value); + const attribute_value = parseCharset(value) catch continue; @memcpy(charset[0..attribute_value.len], attribute_value); // Null-terminate right after attribute value. charset[attribute_value.len] = 0; @@ -334,6 +335,19 @@ test "Mime: invalid" { "text/ html", "text / html", "text/html other", + }; + + for (invalids) |invalid| { + const mutable_input = try testing.arena_allocator.dupe(u8, invalid); + try testing.expectError(error.Invalid, Mime.parse(mutable_input)); + } +} + +test "Mime: malformed parameters are ignored" { + defer testing.reset(); + + // These should all parse successfully as text/html with malformed params ignored + const valid_with_malformed_params = [_][]const u8{ "text/html; x", "text/html; x=", "text/html; x= ", @@ -342,11 +356,13 @@ test "Mime: invalid" { "text/html; charset=\"\"", "text/html; charset=\"", "text/html; charset=\"\\", + "text/html;\"", }; - for (invalids) |invalid| { - const mutable_input = try testing.arena_allocator.dupe(u8, invalid); - try testing.expectError(error.Invalid, Mime.parse(mutable_input)); + for (valid_with_malformed_params) |input| { + const mutable_input = try testing.arena_allocator.dupe(u8, input); + const mime = try Mime.parse(mutable_input); + try testing.expectEqual(.text_html, std.meta.activeTag(mime.content_type)); } } @@ -435,6 +451,12 @@ test "Mime: parse charset" { .charset = "custom-non-standard-charset-value", .params = "charset=\"custom-non-standard-charset-value\"", }, "text/xml;charset=\"custom-non-standard-charset-value\""); + + try expect(.{ + .content_type = .{ .text_html = {} }, + .charset = "UTF-8", + .params = "x=\"", + }, "text/html;x=\""); } test "Mime: isHTML" { diff --git a/src/browser/Page.zig b/src/browser/Page.zig index 5e4578c1..e2ec7885 100644 --- a/src/browser/Page.zig +++ b/src/browser/Page.zig @@ -63,15 +63,14 @@ const NavigationKind = @import("webapi/navigation/root.zig").NavigationKind; const KeyboardEvent = @import("webapi/event/KeyboardEvent.zig"); const Http = App.Http; +const Net = @import("../Net.zig"); const ArenaPool = App.ArenaPool; const timestamp = @import("../datetime.zig").timestamp; const milliTimestamp = @import("../datetime.zig").milliTimestamp; const WebApiURL = @import("webapi/URL.zig"); -const global_event_handlers = @import("webapi/global_event_handlers.zig"); -const GlobalEventHandlersLookup = global_event_handlers.Lookup; -const GlobalEventHandler = global_event_handlers.Handler; +const GlobalEventHandlersLookup = @import("webapi/global_event_handlers.zig").Lookup; var default_url = WebApiURL{ ._raw = "about:blank" }; pub var default_location: Location = Location{ ._url = &default_url }; @@ -139,7 +138,7 @@ _blob_urls: std.StringHashMapUnmanaged(*Blob) = .{}, /// `load` events that'll be fired before window's `load` event. /// A call to `documentIsComplete` (which calls `_documentIsComplete`) resets it. -_to_load: std.ArrayList(*Element) = .{}, +_to_load: std.ArrayList(*Element.Html) = .{}, _script_manager: ScriptManager, @@ -175,7 +174,7 @@ _upgrading_element: ?*Node = null, _undefined_custom_elements: std.ArrayList(*Element.Html.Custom) = .{}, // for heap allocations and managing WebAPI objects -_factory: Factory, +_factory: *Factory, _load_state: LoadState = .waiting, @@ -249,14 +248,15 @@ pub fn init(self: *Page, id: u32, session: *Session, parent: ?*Page) !void { } const browser = session.browser; const arena_pool = browser.arena_pool; - const page_arena = try arena_pool.acquire(); - errdefer arena_pool.release(page_arena); + + const page_arena = if (parent) |p| p.arena else try arena_pool.acquire(); + errdefer if (parent == null) arena_pool.release(page_arena); + + var factory = if (parent) |p| p._factory else try Factory.init(page_arena); const call_arena = try arena_pool.acquire(); errdefer arena_pool.release(call_arena); - var factory = Factory.init(page_arena, self); - const document = (try factory.document(Node.Document.HTMLDocument{ ._proto = undefined, })).asDocument(); @@ -309,15 +309,17 @@ pub fn init(self: *Page, id: u32, session: *Session, parent: ?*Page) !void { self.js = try browser.env.createContext(self); errdefer self.js.deinit(); + document._page = self; + if (comptime builtin.is_test == false) { // HTML test runner manually calls these as necessary try self.js.scheduler.add(session.browser, struct { - fn runMessageLoop(ctx: *anyopaque) !?u32 { + fn runIdleTasks(ctx: *anyopaque) !?u32 { const b: *@import("Browser.zig") = @ptrCast(@alignCast(ctx)); - b.runMessageLoop(); - return 250; + b.runIdleTasks(); + return 200; } - }.runMessageLoop, 250, .{ .name = "page.messageLoop" }); + }.runIdleTasks, 200, .{ .name = "page.runIdleTasks", .low_priority = true }); } } @@ -351,13 +353,16 @@ pub fn deinit(self: *Page) void { var it = self._arena_pool_leak_track.valueIterator(); while (it.next()) |value_ptr| { if (value_ptr.count > 0) { - log.err(.bug, "ArenaPool Leak", .{ .owner = value_ptr.owner, .type = self._type }); + log.err(.bug, "ArenaPool Leak", .{ .owner = value_ptr.owner, .type = self._type, .url = self.url }); } } } self.arena_pool.release(self.call_arena); - self.arena_pool.release(self.arena); + + if (self.parent == null) { + self.arena_pool.release(self.arena); + } } pub fn base(self: *const Page) [:0]const u8 { @@ -420,7 +425,7 @@ pub fn releaseArena(self: *Page, allocator: Allocator) void { if (comptime IS_DEBUG) { const found = self._arena_pool_leak_track.getPtr(@intFromPtr(allocator.ptr)).?; if (found.count != 1) { - log.err(.bug, "ArenaPool Double Free", .{ .owner = found.owner, .count = found.count, .type = self._type }); + log.err(.bug, "ArenaPool Double Free", .{ .owner = found.owner, .count = found.count, .type = self._type, .url = self.url }); return; } found.count = 0; @@ -564,7 +569,7 @@ fn scheduleNavigationWithArena(self: *Page, arena: Allocator, request_url: []con arena, self.base(), request_url, - .{ .always_dupe = true }, + .{ .always_dupe = true, .encode = true }, ); const session = self._session; @@ -637,13 +642,12 @@ pub fn documentIsLoaded(self: *Page) void { self._load_state = .load; self.document._ready_state = .interactive; self._documentIsLoaded() catch |err| { - log.err(.page, "document is loaded", .{ .err = err, .type = self._type }); + log.err(.page, "document is loaded", .{ .err = err, .type = self._type, .url = self.url }); }; } pub fn _documentIsLoaded(self: *Page) !void { const event = try Event.initTrusted(.wrap("DOMContentLoaded"), .{ .bubbles = true }, self); - defer if (!event._v8_handoff) event.deinit(false); try self._event_manager.dispatch( self.document.asEventTarget(), event, @@ -661,10 +665,9 @@ pub fn iframeCompletedLoading(self: *Page, iframe: *Element.Html.IFrame) void { defer ls.deinit(); const event = Event.initTrusted(comptime .wrap("load"), .{}, self) catch |err| { - log.err(.page, "iframe event init", .{ .err = err }); + log.err(.page, "iframe event init", .{ .err = err, .url = iframe._src }); break :blk; }; - defer if (!event._v8_handoff) event.deinit(false); self._event_manager.dispatch(iframe.asNode().asEventTarget(), event) catch |err| { log.warn(.js, "iframe onload", .{ .err = err, .url = iframe._src }); }; @@ -701,7 +704,7 @@ pub fn documentIsComplete(self: *Page) void { self._load_state = .complete; self._documentIsComplete() catch |err| { - log.err(.page, "document is complete", .{ .err = err, .type = self._type }); + log.err(.page, "document is complete", .{ .err = err, .type = self._type, .url = self.url }); }; if (IS_DEBUG) { @@ -720,23 +723,15 @@ pub fn documentIsComplete(self: *Page) void { fn _documentIsComplete(self: *Page) !void { self.document._ready_state = .complete; + // Run load events before window.load. + try self.dispatchLoad(); + var ls: JS.Local.Scope = undefined; self.js.localScope(&ls); defer ls.deinit(); - // Dispatch `_to_load` events before window.load. - for (self._to_load.items) |element| { - const event = try Event.initTrusted(comptime .wrap("load"), .{}, self); - defer if (!event._v8_handoff) event.deinit(false); - try self._event_manager.dispatch(element.asEventTarget(), event); - } - - // `_to_load` can be cleaned here. - self._to_load.clearAndFree(self.arena); - // Dispatch window.load event. const event = try Event.initTrusted(comptime .wrap("load"), .{}, self); - defer if (!event._v8_handoff) event.deinit(false); // This event is weird, it's dispatched directly on the window, but // with the document as the target. event._target = self.document.asEventTarget(); @@ -748,7 +743,6 @@ fn _documentIsComplete(self: *Page) !void { ); const pageshow_event = (try PageTransitionEvent.initTrusted(comptime .wrap("pageshow"), .{}, self)).asEvent(); - defer if (!pageshow_event._v8_handoff) pageshow_event.deinit(false); try self._event_manager.dispatchWithFunction( self.window.asEventTarget(), pageshow_event, @@ -806,7 +800,7 @@ fn pageDataCallback(transfer: *Http.Transfer, data: []const u8) !void { } orelse .unknown; if (comptime IS_DEBUG) { - log.debug(.page, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len, .type = self._type }); + log.debug(.page, "navigate first chunk", .{ .content_type = mime.content_type, .len = data.len, .type = self._type, .url = self.url }); } switch (mime.content_type) { @@ -853,7 +847,7 @@ fn pageDoneCallback(ctx: *anyopaque) !void { var self: *Page = @ptrCast(@alignCast(ctx)); if (comptime IS_DEBUG) { - log.debug(.page, "navigate done", .{ .type = self._type }); + log.debug(.page, "navigate done", .{ .type = self._type, .url = self.url }); } //We need to handle different navigation types differently. @@ -872,11 +866,6 @@ fn pageDoneCallback(ctx: *anyopaque) !void { .html => |buf| { parser.parse(buf.items); self._script_manager.staticScriptsDone(); - if (self._script_manager.isDone()) { - // No scripts, or just inline scripts that were already processed - // we need to trigger this ourselves - self.documentIsComplete(); - } self._parse_state = .complete; }, .text => |*buf| { @@ -931,248 +920,17 @@ fn pageDoneCallback(ctx: *anyopaque) !void { fn pageErrorCallback(ctx: *anyopaque, err: anyerror) void { var self: *Page = @ptrCast(@alignCast(ctx)); - log.err(.page, "navigate failed", .{ .err = err, .type = self._type }); + log.err(.page, "navigate failed", .{ .err = err, .type = self._type, .url = self.url }); self._parse_state = .{ .err = err }; // In case of error, we want to complete the page with a custom HTML // containing the error. pageDoneCallback(ctx) catch |e| { - log.err(.browser, "pageErrorCallback", .{ .err = e, .type = self._type }); + log.err(.browser, "pageErrorCallback", .{ .err = e, .type = self._type, .url = self.url }); return; }; } -pub fn wait(self: *Page, wait_ms: u32) Session.WaitResult { - return self._wait(wait_ms) catch |err| { - switch (err) { - error.JsError => {}, // already logged (with hopefully more context) - else => { - // There may be errors from the http/client or ScriptManager - // that we should not treat as an error like this. Will need - // to run this through more real-world sites and see if we need - // to expand the switch (err) to have more customized logs for - // specific messages. - log.err(.browser, "page wait", .{ .err = err, .type = self._type }); - }, - } - return .done; - }; -} - -fn _wait(self: *Page, wait_ms: u32) !Session.WaitResult { - if (comptime IS_DEBUG) { - std.debug.assert(self._type == .root); - } - - var timer = try std.time.Timer.start(); - var ms_remaining = wait_ms; - - const browser = self._session.browser; - var http_client = browser.http_client; - - // I'd like the page to know NOTHING about cdp_socket / CDP, but the - // fact is that the behavior of wait changes depending on whether or - // not we're using CDP. - // If we aren't using CDP, as soon as we think there's nothing left - // to do, we can exit - we'de done. - // But if we are using CDP, we should wait for the whole `wait_ms` - // because the http_click.tick() also monitors the CDP socket. And while - // we could let CDP poll http (like it does for HTTP requests), the fact - // is that we know more about the timing of stuff (e.g. how long to - // poll/sleep) in the page. - const exit_when_done = http_client.cdp_client == null; - - // for debugging - // defer self.printWaitAnalysis(); - - while (true) { - switch (self._parse_state) { - .pre, .raw, .text, .image => { - // The main page hasn't started/finished navigating. - // There's no JS to run, and no reason to run the scheduler. - if (http_client.active == 0 and exit_when_done) { - // haven't started navigating, I guess. - return .done; - } - // Either we have active http connections, or we're in CDP - // mode with an extra socket. Either way, we're waiting - // for http traffic - if (try http_client.tick(@intCast(ms_remaining)) == .cdp_socket) { - // exit_when_done is explicitly set when there isn't - // an extra socket, so it should not be possibl to - // get an cdp_socket message when exit_when_done - // is true. - if (IS_DEBUG) { - std.debug.assert(exit_when_done == false); - } - - // data on a socket we aren't handling, return to caller - return .cdp_socket; - } - }, - .html, .complete => { - if (self._queued_navigation != null) { - return .done; - } - - // The HTML page was parsed. We now either have JS scripts to - // download, or scheduled tasks to execute, or both. - - // scheduler.run could trigger new http transfers, so do not - // store http_client.active BEFORE this call and then use - // it AFTER. - const ms_to_next_task = try browser.runMacrotasks(); - - const http_active = http_client.active; - const total_network_activity = http_active + http_client.intercepted; - if (self._notified_network_almost_idle.check(total_network_activity <= 2)) { - self.notifyNetworkAlmostIdle(); - } - if (self._notified_network_idle.check(total_network_activity == 0)) { - self.notifyNetworkIdle(); - } - - if (http_active == 0 and exit_when_done) { - // we don't need to consider http_client.intercepted here - // because exit_when_done is true, and that can only be - // the case when interception isn't possible. - if (comptime IS_DEBUG) { - std.debug.assert(http_client.intercepted == 0); - } - - const ms = ms_to_next_task orelse blk: { - if (wait_ms - ms_remaining < 100) { - if (comptime builtin.is_test) { - return .done; - } - // Look, we want to exit ASAP, but we don't want - // to exit so fast that we've run none of the - // background jobs. - break :blk 50; - } - // No http transfers, no cdp extra socket, no - // scheduled tasks, we're done. - return .done; - }; - - if (ms > ms_remaining) { - // Same as above, except we have a scheduled task, - // it just happens to be too far into the future - // compared to how long we were told to wait. - return .done; - } - - // We have a task to run in the not-so-distant future. - // You might think we can just sleep until that task is - // ready, but we should continue to run lowPriority tasks - // in the meantime, and that could unblock things. So - // we'll just sleep for a bit, and then restart our wait - // loop to see if anything new can be processed. - std.Thread.sleep(std.time.ns_per_ms * @as(u64, @intCast(@min(ms, 20)))); - } else { - // We're here because we either have active HTTP - // connections, or exit_when_done == false (aka, there's - // an cdp_socket registered with the http client). - // We should continue to run lowPriority tasks, so we - // minimize how long we'll poll for network I/O. - const ms_to_wait = @min(200, @min(ms_remaining, ms_to_next_task orelse 200)); - if (try http_client.tick(ms_to_wait) == .cdp_socket) { - // data on a socket we aren't handling, return to caller - return .cdp_socket; - } - } - }, - .err => |err| { - self._parse_state = .{ .raw_done = @errorName(err) }; - return err; - }, - .raw_done => { - if (exit_when_done) { - return .done; - } - // we _could_ http_client.tick(ms_to_wait), but this has - // the same result, and I feel is more correct. - return .no_page; - }, - } - - const ms_elapsed = timer.lap() / 1_000_000; - if (ms_elapsed >= ms_remaining) { - return .done; - } - ms_remaining -= @intCast(ms_elapsed); - } -} - -fn printWaitAnalysis(self: *Page) void { - std.debug.print("load_state: {s}\n", .{@tagName(self._load_state)}); - std.debug.print("parse_state: {s}\n", .{@tagName(std.meta.activeTag(self._parse_state))}); - { - std.debug.print("\nactive requests: {d}\n", .{self._session.browser.http_client.active}); - var n_ = self._session.browser.http_client.handles.in_use.first; - while (n_) |n| { - const handle: *Http.Client.Handle = @fieldParentPtr("node", n); - const transfer = Http.Transfer.fromEasy(handle.conn.easy) catch |err| { - std.debug.print(" - failed to load transfer: {any}\n", .{err}); - break; - }; - std.debug.print(" - {f}\n", .{transfer}); - n_ = n.next; - } - } - - { - std.debug.print("\nqueued requests: {d}\n", .{self._session.browser.http_client.queue.len()}); - var n_ = self._session.browser.http_client.queue.first; - while (n_) |n| { - const transfer: *Http.Transfer = @fieldParentPtr("_node", n); - std.debug.print(" - {f}\n", .{transfer}); - n_ = n.next; - } - } - - { - std.debug.print("\ndeferreds: {d}\n", .{self._script_manager.defer_scripts.len()}); - var n_ = self._script_manager.defer_scripts.first; - while (n_) |n| { - const script: *ScriptManager.Script = @fieldParentPtr("node", n); - std.debug.print(" - {s} complete: {any}\n", .{ script.url, script.complete }); - n_ = n.next; - } - } - - { - std.debug.print("\nasyncs: {d}\n", .{self._script_manager.async_scripts.len()}); - } - - { - std.debug.print("\nasyncs ready: {d}\n", .{self._script_manager.ready_scripts.len()}); - var n_ = self._script_manager.ready_scripts.first; - while (n_) |n| { - const script: *ScriptManager.Script = @fieldParentPtr("node", n); - std.debug.print(" - {s} complete: {any}\n", .{ script.url, script.complete }); - n_ = n.next; - } - } - - const now = milliTimestamp(.monotonic); - { - std.debug.print("\nhigh_priority schedule: {d}\n", .{self.js.scheduler.high_priority.count()}); - var it = self.js.scheduler.high_priority.iterator(); - while (it.next()) |task| { - std.debug.print(" - {s} schedule: {d}ms\n", .{ task.name, task.run_at - now }); - } - } - - { - std.debug.print("\nlow_priority schedule: {d}\n", .{self.js.scheduler.low_priority.count()}); - var it = self.js.scheduler.low_priority.iterator(); - while (it.next()) |task| { - std.debug.print(" - {s} schedule: {d}ms\n", .{ task.name, task.run_at - now }); - } - } -} - pub fn isGoingAway(self: *const Page) bool { return self._queued_navigation != null; } @@ -1186,6 +944,7 @@ pub fn scriptAddedCallback(self: *Page, comptime from_parser: bool, script: *Ele self._script_manager.addFromElement(from_parser, script, "parsing") catch |err| { log.err(.page, "page.scriptAddedCallback", .{ .err = err, + .url = self.url, .src = script.asElement().getAttributeSafe(comptime .wrap("src")), .type = self._type, }); @@ -1201,7 +960,7 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void { return; } - const src = try iframe.getSrc(self); + const src = iframe.asElement().getAttributeSafe(comptime .wrap("src")) orelse return; if (src.len == 0) { return; } @@ -1223,8 +982,16 @@ pub fn iframeAddedCallback(self: *Page, iframe: *Element.Html.IFrame) !void { .timestamp = timestamp(.monotonic), }); - page_frame.navigate(src, .{ .reason = .initialFrameNavigation }) catch |err| { - log.warn(.page, "iframe navigate failure", .{ .url = src, .err = err }); + // navigate will dupe the url + const url = try URL.resolve( + self.call_arena, + self.base(), + src, + .{ .encode = true }, + ); + + page_frame.navigate(url, .{ .reason = .initialFrameNavigation }) catch |err| { + log.warn(.page, "iframe navigate failure", .{ .url = url, .err = err }); self._pending_loads -= 1; iframe._content_window = null; page_frame.deinit(); @@ -1272,7 +1039,7 @@ pub fn domChanged(self: *Page) void { self._intersection_check_scheduled = true; self.js.queueIntersectionChecks() catch |err| { - log.err(.page, "page.schedIntersectChecks", .{ .err = err, .type = self._type }); + log.err(.page, "page.schedIntersectChecks", .{ .err = err, .type = self._type, .url = self.url }); }; } @@ -1351,29 +1118,6 @@ pub fn getElementByIdFromNode(self: *Page, node: *Node, id: []const u8) ?*Elemen return null; } -/// Sets an inline event listener (`onload`, `onclick`, `onwheel` etc.); -/// overrides the listener if there's already one. -pub fn setAttrListener( - self: *Page, - element: *Element, - listener_type: GlobalEventHandler, - listener_callback: JS.Function.Global, -) !void { - if (comptime IS_DEBUG) { - log.debug(.event, "Page.setAttrListener", .{ - .element = element, - .listener_type = listener_type, - .type = self._type, - }); - } - - const gop = try self._element_attr_listeners.getOrPut(self.arena, .{ - .target = element.asEventTarget(), - .handler = listener_type, - }); - gop.value_ptr.* = listener_callback; -} - pub fn registerPerformanceObserver(self: *Page, observer: *PerformanceObserver) !void { return self._performance_observers.append(self.arena, observer); } @@ -1393,7 +1137,7 @@ pub fn notifyPerformanceObservers(self: *Page, entry: *Performance.Entry) !void for (self._performance_observers.items) |observer| { if (observer.interested(entry)) { observer._entries.append(self.arena, entry) catch |err| { - log.err(.page, "notifyPerformanceObservers", .{ .err = err, .type = self._type }); + log.err(.page, "notifyPerformanceObservers", .{ .err = err, .type = self._type, .url = self.url }); }; } } @@ -1458,6 +1202,18 @@ pub fn checkIntersections(self: *Page) !void { } } +pub fn dispatchLoad(self: *Page) !void { + const has_dom_load_listener = self._event_manager.has_dom_load_listener; + for (self._to_load.items) |html_element| { + if (has_dom_load_listener or html_element.hasAttributeFunction(.onload, self)) { + const event = try Event.initTrusted(comptime .wrap("load"), .{}, self); + try self._event_manager.dispatch(html_element.asEventTarget(), event); + } + } + // We drained everything. + self._to_load.clearRetainingCapacity(); +} + pub fn scheduleMutationDelivery(self: *Page) !void { if (self._mutation_delivery_scheduled) { return; @@ -1488,7 +1244,7 @@ pub fn performScheduledIntersectionChecks(self: *Page) void { } self._intersection_check_scheduled = false; self.checkIntersections() catch |err| { - log.err(.page, "page.schedIntersectChecks", .{ .err = err, .type = self._type }); + log.err(.page, "page.schedIntersectChecks", .{ .err = err, .type = self._type, .url = self.url }); }; } @@ -1504,7 +1260,7 @@ pub fn deliverIntersections(self: *Page) void { i -= 1; const observer = self._intersection_observers.items[i]; observer.deliverEntries(self) catch |err| { - log.err(.page, "page.deliverIntersections", .{ .err = err, .type = self._type }); + log.err(.page, "page.deliverIntersections", .{ .err = err, .type = self._type, .url = self.url }); }; } } @@ -1522,7 +1278,7 @@ pub fn deliverMutations(self: *Page) void { }; if (self._mutation_delivery_depth > 100) { - log.err(.page, "page.MutationLimit", .{ .type = self._type }); + log.err(.page, "page.MutationLimit", .{ .type = self._type, .url = self.url }); self._mutation_delivery_depth = 0; return; } @@ -1531,7 +1287,7 @@ pub fn deliverMutations(self: *Page) void { while (it) |node| : (it = node.next) { const observer: *MutationObserver = @fieldParentPtr("node", node); observer.deliverRecords(self) catch |err| { - log.err(.page, "page.deliverMutations", .{ .err = err, .type = self._type }); + log.err(.page, "page.deliverMutations", .{ .err = err, .type = self._type, .url = self.url }); }; } } @@ -1549,7 +1305,7 @@ pub fn deliverSlotchangeEvents(self: *Page) void { var i: usize = 0; var slots = self.call_arena.alloc(*Element.Html.Slot, pending) catch |err| { - log.err(.page, "deliverSlotchange.append", .{ .err = err, .type = self._type }); + log.err(.page, "deliverSlotchange.append", .{ .err = err, .type = self._type, .url = self.url }); return; }; @@ -1562,14 +1318,12 @@ pub fn deliverSlotchangeEvents(self: *Page) void { for (slots) |slot| { const event = Event.initTrusted(comptime .wrap("slotchange"), .{ .bubbles = true }, self) catch |err| { - log.err(.page, "deliverSlotchange.init", .{ .err = err, .type = self._type }); + log.err(.page, "deliverSlotchange.init", .{ .err = err, .type = self._type, .url = self.url }); continue; }; - defer if (!event._v8_handoff) event.deinit(false); - const target = slot.asNode().asEventTarget(); _ = target.dispatchEvent(event, self) catch |err| { - log.err(.page, "deliverSlotchange.dispatch", .{ .err = err, .type = self._type }); + log.err(.page, "deliverSlotchange.dispatch", .{ .err = err, .type = self._type, .url = self.url }); }; } } @@ -1601,10 +1355,8 @@ pub fn appendNew(self: *Page, parent: *Node, child: Node.NodeOrText) !void { if (parent.lastChild()) |sibling| { if (sibling.is(CData.Text)) |tn| { const cdata = tn._proto; - const existing = cdata.getData(); - // @metric - // Inefficient, but we don't expect this to happen often. - cdata._data = try std.mem.concat(self.arena, u8, &.{ existing, txt }); + const existing = cdata.getData().str(); + cdata._data = try String.concat(self.arena, &.{ existing, txt }); return; } } @@ -1624,7 +1376,7 @@ pub fn appendNew(self: *Page, parent: *Node, child: Node.NodeOrText) !void { // called from the parser when the node and all its children have been added pub fn nodeComplete(self: *Page, node: *Node) !void { Node.Build.call(node, "complete", .{ node, self }) catch |err| { - log.err(.bug, "build.complete", .{ .tag = node.getNodeName(&self.buf), .err = err, .type = self._type }); + log.err(.bug, "build.complete", .{ .tag = node.getNodeName(&self.buf), .err = err, .type = self._type, .url = self.url }); return err; }; return self.nodeIsReady(true, node); @@ -2323,7 +2075,7 @@ pub fn createElementNS(self: *Page, namespace: Element.Namespace, name: []const var caught: JS.TryCatch.Caught = undefined; _ = ls.toLocal(def.constructor).newInstance(&caught) catch |err| { - log.warn(.js, "custom element constructor", .{ .name = name, .err = err, .caught = caught, .type = self._type }); + log.warn(.js, "custom element constructor", .{ .name = name, .err = err, .caught = caught, .type = self._type, .url = self.url }); return node; }; @@ -2381,7 +2133,7 @@ fn createHtmlElementT(self: *Page, comptime E: type, namespace: Element.Namespac const node = element.asNode(); if (@hasDecl(E, "Build") and @hasDecl(E.Build, "created")) { @call(.auto, @field(E.Build, "created"), .{ node, self }) catch |err| { - log.err(.page, "build.created", .{ .tag = node.getNodeName(&self.buf), .err = err, .type = self._type }); + log.err(.page, "build.created", .{ .tag = node.getNodeName(&self.buf), .err = err, .type = self._type, .url = self.url }); return err; }; } @@ -2434,28 +2186,24 @@ fn populateElementAttributes(self: *Page, element: *Element, list: anytype) !voi } pub fn createTextNode(self: *Page, text: []const u8) !*Node { - // might seem unlikely that we get an intern hit, but we'll get some nodes - // with just '\n' - const owned_text = try self.dupeString(text); const cd = try self._factory.node(CData{ ._proto = undefined, ._type = .{ .text = .{ ._proto = undefined, } }, - ._data = owned_text, + ._data = try self.dupeSSO(text), }); cd._type.text._proto = cd; return cd.asNode(); } pub fn createComment(self: *Page, text: []const u8) !*Node { - const owned_text = try self.dupeString(text); const cd = try self._factory.node(CData{ ._proto = undefined, ._type = .{ .comment = .{ ._proto = undefined, } }, - ._data = owned_text, + ._data = try self.dupeSSO(text), }); cd._type.comment._proto = cd; return cd.asNode(); @@ -2467,8 +2215,6 @@ pub fn createCDATASection(self: *Page, data: []const u8) !*Node { return error.InvalidCharacterError; } - const owned_data = try self.dupeString(data); - // First allocate the Text node separately const text_node = try self._factory.create(CData.Text{ ._proto = undefined, @@ -2480,7 +2226,7 @@ pub fn createCDATASection(self: *Page, data: []const u8) !*Node { ._type = .{ .cdata_section = .{ ._proto = text_node, } }, - ._data = owned_data, + ._data = try self.dupeSSO(data), }); // Set up the back pointer from Text to CData @@ -2502,7 +2248,6 @@ pub fn createProcessingInstruction(self: *Page, target: []const u8, data: []cons try validateXmlName(target); const owned_target = try self.dupeString(target); - const owned_data = try self.dupeString(data); const pi = try self._factory.create(CData.ProcessingInstruction{ ._proto = undefined, @@ -2512,7 +2257,7 @@ pub fn createProcessingInstruction(self: *Page, target: []const u8, data: []cons const cd = try self._factory.node(CData{ ._proto = undefined, ._type = .{ .processing_instruction = pi }, - ._data = owned_data, + ._data = try self.dupeSSO(data), }); // Set up the back pointer from ProcessingInstruction to CData @@ -2585,6 +2330,10 @@ pub fn dupeString(self: *Page, value: []const u8) ![]const u8 { return self.arena.dupe(u8, value); } +pub fn dupeSSO(self: *Page, value: []const u8) !String { + return String.init(self.arena, value, .{ .dupe = true }); +} + const RemoveNodeOpts = struct { will_be_reconnected: bool, }; @@ -2851,7 +2600,7 @@ pub fn _insertNodeRelative(self: *Page, comptime from_parser: bool, parent: *Nod pub fn attributeChange(self: *Page, element: *Element, name: String, value: String, old_value: ?String) void { _ = Element.Build.call(element, "attributeChange", .{ element, name, value, self }) catch |err| { - log.err(.bug, "build.attributeChange", .{ .tag = element.getTag(), .name = name, .value = value, .err = err, .type = self._type }); + log.err(.bug, "build.attributeChange", .{ .tag = element.getTag(), .name = name, .value = value, .err = err, .type = self._type, .url = self.url }); }; Element.Html.Custom.invokeAttributeChangedCallbackOnElement(element, name, old_value, value, self); @@ -2860,7 +2609,7 @@ pub fn attributeChange(self: *Page, element: *Element, name: String, value: Stri while (it) |node| : (it = node.next) { const observer: *MutationObserver = @fieldParentPtr("node", node); observer.notifyAttributeChange(element, name, old_value, self) catch |err| { - log.err(.page, "attributeChange.notifyObserver", .{ .err = err, .type = self._type }); + log.err(.page, "attributeChange.notifyObserver", .{ .err = err, .type = self._type, .url = self.url }); }; } @@ -2877,7 +2626,7 @@ pub fn attributeChange(self: *Page, element: *Element, name: String, value: Stri pub fn attributeRemove(self: *Page, element: *Element, name: String, old_value: String) void { _ = Element.Build.call(element, "attributeRemove", .{ element, name, self }) catch |err| { - log.err(.bug, "build.attributeRemove", .{ .tag = element.getTag(), .name = name, .err = err, .type = self._type }); + log.err(.bug, "build.attributeRemove", .{ .tag = element.getTag(), .name = name, .err = err, .type = self._type, .url = self.url }); }; Element.Html.Custom.invokeAttributeChangedCallbackOnElement(element, name, old_value, null, self); @@ -2886,7 +2635,7 @@ pub fn attributeRemove(self: *Page, element: *Element, name: String, old_value: while (it) |node| : (it = node.next) { const observer: *MutationObserver = @fieldParentPtr("node", node); observer.notifyAttributeChange(element, name, old_value, self) catch |err| { - log.err(.page, "attributeRemove.notifyObserver", .{ .err = err, .type = self._type }); + log.err(.page, "attributeRemove.notifyObserver", .{ .err = err, .type = self._type, .url = self.url }); }; } @@ -2903,11 +2652,11 @@ pub fn attributeRemove(self: *Page, element: *Element, name: String, old_value: fn signalSlotChange(self: *Page, slot: *Element.Html.Slot) void { self._slots_pending_slotchange.put(self.arena, slot, {}) catch |err| { - log.err(.page, "signalSlotChange.put", .{ .err = err, .type = self._type }); + log.err(.page, "signalSlotChange.put", .{ .err = err, .type = self._type, .url = self.url }); return; }; self.scheduleSlotchangeDelivery() catch |err| { - log.err(.page, "signalSlotChange.schedule", .{ .err = err, .type = self._type }); + log.err(.page, "signalSlotChange.schedule", .{ .err = err, .type = self._type, .url = self.url }); }; } @@ -2947,7 +2696,7 @@ fn updateElementAssignedSlot(self: *Page, element: *Element) void { // Recursively search through the shadow root for a matching slot if (findMatchingSlot(shadow_root.asNode(), slot_name)) |slot| { self._element_assigned_slots.put(self.arena, element, slot) catch |err| { - log.err(.page, "updateElementAssignedSlot.put", .{ .err = err, .type = self._type }); + log.err(.page, "updateElementAssignedSlot.put", .{ .err = err, .type = self._type, .url = self.url }); }; } } @@ -2988,13 +2737,13 @@ pub fn setCustomizedBuiltInDefinition(self: *Page, element: *Element, definition pub fn characterDataChange( self: *Page, target: *Node, - old_value: []const u8, + old_value: String, ) void { var it: ?*std.DoublyLinkedList.Node = self._mutation_observers.first; while (it) |node| : (it = node.next) { const observer: *MutationObserver = @fieldParentPtr("node", node); observer.notifyCharacterDataChange(target, old_value, self) catch |err| { - log.err(.page, "cdataChange.notifyObserver", .{ .err = err, .type = self._type }); + log.err(.page, "cdataChange.notifyObserver", .{ .err = err, .type = self._type, .url = self.url }); }; } } @@ -3021,7 +2770,7 @@ pub fn childListChange( while (it) |node| : (it = node.next) { const observer: *MutationObserver = @fieldParentPtr("node", node); observer.notifyChildListChange(target, added_nodes, removed_nodes, previous_sibling, next_sibling, self) catch |err| { - log.err(.page, "childListChange.notifyObserver", .{ .err = err, .type = self._type }); + log.err(.page, "childListChange.notifyObserver", .{ .err = err, .type = self._type, .url = self.url }); }; } } @@ -3072,7 +2821,7 @@ fn nodeIsReady(self: *Page, comptime from_parser: bool, node: *Node) !void { } self.scriptAddedCallback(from_parser, script) catch |err| { - log.err(.page, "page.nodeIsReady", .{ .err = err, .element = "script", .type = self._type }); + log.err(.page, "page.nodeIsReady", .{ .err = err, .element = "script", .type = self._type, .url = self.url }); return err; }; } else if (node.is(Element.Html.IFrame)) |iframe| { @@ -3082,9 +2831,19 @@ fn nodeIsReady(self: *Page, comptime from_parser: bool, node: *Node) !void { } self.iframeAddedCallback(iframe) catch |err| { - log.err(.page, "page.nodeIsReady", .{ .err = err, .element = "iframe", .type = self._type }); + log.err(.page, "page.nodeIsReady", .{ .err = err, .element = "iframe", .type = self._type, .url = self.url }); return err; }; + } else if (node.is(Element.Html.Link)) |link| { + link.linkAddedCallback(self) catch |err| { + log.err(.page, "page.nodeIsReady", .{ .err = err, .element = "link", .type = self._type }); + return error.LinkLoadError; + }; + } else if (node.is(Element.Html.Style)) |style| { + style.styleAddedCallback(self) catch |err| { + log.err(.page, "page.nodeIsReady", .{ .err = err, .element = "style", .type = self._type }); + return error.StyleLoadError; + }; } } @@ -3242,8 +3001,6 @@ pub fn triggerMouseClick(self: *Page, x: f64, y: f64) !void { .clientX = x, .clientY = y, }, self)).asEvent(); - - defer if (!event._v8_handoff) event.deinit(false); try self._event_manager.dispatch(target.asEventTarget(), event); } @@ -3267,12 +3024,12 @@ pub fn handleClick(self: *Page, target: *Node) !void { // Check target attribute - don't navigate if opening in new window/tab const target_val = anchor.getTarget(); if (target_val.len > 0 and !std.mem.eql(u8, target_val, "_self")) { - log.warn(.not_implemented, "a.target", .{ .type = self._type }); + log.warn(.not_implemented, "a.target", .{ .type = self._type, .url = self.url }); return; } if (try element.hasAttribute(comptime .wrap("download"), self)) { - log.warn(.browser, "a.download", .{ .type = self._type }); + log.warn(.browser, "a.download", .{ .type = self._type, .url = self.url }); return; } @@ -3301,8 +3058,6 @@ pub fn handleClick(self: *Page, target: *Node) !void { pub fn triggerKeyboard(self: *Page, keyboard_event: *KeyboardEvent) !void { const event = keyboard_event.asEvent(); - defer if (!event._v8_handoff) event.deinit(false); - const element = self.window._document._active_element orelse return; if (comptime IS_DEBUG) { log.debug(.page, "page keydown", .{ @@ -3316,7 +3071,7 @@ pub fn triggerKeyboard(self: *Page, keyboard_event: *KeyboardEvent) !void { } pub fn handleKeydown(self: *Page, target: *Node, event: *Event) !void { - const keyboard_event = event.as(KeyboardEvent); + const keyboard_event = event.is(KeyboardEvent) orelse return; const key = keyboard_event.getKey(); if (key == .Dead) { @@ -3372,10 +3127,8 @@ pub fn submitForm(self: *Page, submitter_: ?*Element, form_: ?*Element.Html.Form const form_element = form.asElement(); if (submit_opts.fire_event) { - const submit_event = try Event.initTrusted(comptime .wrap("submit"), .{ .bubbles = true, .cancelable = true }, self); - defer if (!submit_event._v8_handoff) submit_event.deinit(false); - const onsubmit_handler = try form.asHtmlElement().getOnSubmit(self); + const submit_event = try Event.initTrusted(comptime .wrap("submit"), .{ .bubbles = true, .cancelable = true }, self); var ls: JS.Local.Scope = undefined; self.js.localScope(&ls); diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index 19e20825..61d4aef0 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -20,13 +20,14 @@ const std = @import("std"); const lp = @import("lightpanda"); const builtin = @import("builtin"); -const js = @import("js/js.zig"); const log = @import("../log.zig"); +const Http = @import("../http/Http.zig"); +const String = @import("../string.zig").String; +const js = @import("js/js.zig"); const URL = @import("URL.zig"); const Page = @import("Page.zig"); const Browser = @import("Browser.zig"); -const Http = @import("../http/Http.zig"); const Element = @import("webapi/Element.zig"); @@ -581,12 +582,6 @@ fn evaluate(self: *ScriptManager) void { } } -pub fn isDone(self: *const ScriptManager) bool { - return self.static_scripts_done and // page is done processing initial html - self.defer_scripts.first == null and // no deferred scripts - self.async_scripts.first == null; // no async scripts -} - fn parseImportmap(self: *ScriptManager, script: *const Script) !void { const content = script.source.content(); @@ -627,8 +622,19 @@ pub const Script = struct { node: std.DoublyLinkedList.Node, script_element: ?*Element.Html.Script, manager: *ScriptManager, + + // for debugging a rare production issue header_callback_called: bool = false, + // for debugging a rare production issue + debug_transfer_id: u32 = 0, + debug_transfer_tries: u8 = 0, + debug_transfer_aborted: bool = false, + debug_transfer_bytes_received: usize = 0, + debug_transfer_notified_fail: bool = false, + debug_transfer_redirecting: bool = false, + debug_transfer_intercept_state: u8 = 0, + const Kind = enum { module, javascript, @@ -696,8 +702,31 @@ pub const Script = struct { // temp debug, trying to figure out why the next assert sometimes // fails. Is the buffer just corrupt or is headerCallback really // being called twice? - lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{}); + lp.assert(self.header_callback_called == false, "ScriptManager.Header recall", .{ + .m = @tagName(std.meta.activeTag(self.mode)), + .a1 = self.debug_transfer_id, + .a2 = self.debug_transfer_tries, + .a3 = self.debug_transfer_aborted, + .a4 = self.debug_transfer_bytes_received, + .a5 = self.debug_transfer_notified_fail, + .a6 = self.debug_transfer_redirecting, + .a7 = self.debug_transfer_intercept_state, + .b1 = transfer.id, + .b2 = transfer._tries, + .b3 = transfer.aborted, + .b4 = transfer.bytes_received, + .b5 = transfer._notified_fail, + .b6 = transfer._redirecting, + .b7 = @intFromEnum(transfer._intercept_state), + }); self.header_callback_called = true; + self.debug_transfer_id = transfer.id; + self.debug_transfer_tries = transfer._tries; + self.debug_transfer_aborted = transfer.aborted; + self.debug_transfer_bytes_received = transfer.bytes_received; + self.debug_transfer_notified_fail = transfer._notified_fail; + self.debug_transfer_redirecting = transfer._redirecting; + self.debug_transfer_intercept_state = @intFromEnum(transfer._intercept_state); } lp.assert(self.source.remote.capacity == 0, "ScriptManager.Header buffer", .{ .capacity = self.source.remote.capacity }); @@ -830,13 +859,15 @@ pub const Script = struct { .kind = self.kind, .cacheable = cacheable, }); - self.executeCallback("error", local.toLocal(script_element._on_error), page); + self.executeCallback(comptime .wrap("error"), page); return; }; - self.executeCallback("load", local.toLocal(script_element._on_load), page); + self.executeCallback(comptime .wrap("load"), page); return; } + defer page._event_manager.clearIgnoreList(); + var try_catch: js.TryCatch = undefined; try_catch.init(local); defer try_catch.deinit(); @@ -855,19 +886,18 @@ pub const Script = struct { }; if (comptime IS_DEBUG) { - log.debug(.browser, "executed script", .{ .src = url, .success = success, .on_load = script_element._on_load != null }); + log.debug(.browser, "executed script", .{ .src = url, .success = success }); } defer { - // We should run microtasks even if script execution fails. - local.runMicrotasks(); + local.runMacrotasks(); // also runs microtasks _ = page.js.scheduler.run() catch |err| { log.err(.page, "scheduler", .{ .err = err }); }; } if (success) { - self.executeCallback("load", local.toLocal(script_element._on_load), page); + self.executeCallback(comptime .wrap("load"), page); return; } @@ -878,14 +908,12 @@ pub const Script = struct { .cacheable = cacheable, }); - self.executeCallback("error", local.toLocal(script_element._on_error), page); + self.executeCallback(comptime .wrap("error"), page); } - fn executeCallback(self: *const Script, comptime typ: []const u8, cb_: ?js.Function, page: *Page) void { - const cb = cb_ orelse return; - + fn executeCallback(self: *const Script, typ: String, page: *Page) void { const Event = @import("webapi/Event.zig"); - const event = Event.initTrusted(comptime .wrap(typ), .{}, page) catch |err| { + const event = Event.initTrusted(typ, .{}, page) catch |err| { log.warn(.js, "script internal callback", .{ .url = self.url, .type = typ, @@ -893,14 +921,11 @@ pub const Script = struct { }); return; }; - defer if (!event._v8_handoff) event.deinit(false); - - var caught: js.TryCatch.Caught = undefined; - cb.tryCall(void, .{event}, &caught) catch { + page._event_manager.dispatchOpts(self.script_element.?.asNode().asEventTarget(), event, .{ .apply_ignore = true }) catch |err| { log.warn(.js, "script callback", .{ .url = self.url, .type = typ, - .caught = caught, + .err = err, }); }; } @@ -1020,23 +1045,35 @@ fn parseDataURI(allocator: Allocator, src: []const u8) !?[]const u8 { const uri = src[5..]; const data_starts = std.mem.indexOfScalar(u8, uri, ',') orelse return null; + const data = uri[data_starts + 1 ..]; - var data = uri[data_starts + 1 ..]; + const unescaped = try URL.unescape(allocator, data); - // Extract the encoding. const metadata = uri[0..data_starts]; - if (std.mem.endsWith(u8, metadata, ";base64")) { - const decoder = std.base64.standard.Decoder; - const decoded_size = try decoder.calcSizeForSlice(data); - - const buffer = try allocator.alloc(u8, decoded_size); - errdefer allocator.free(buffer); - - try decoder.decode(buffer, data); - data = buffer; + if (std.mem.endsWith(u8, metadata, ";base64") == false) { + return unescaped; } - return data; + // Forgiving base64 decode per WHATWG spec: + // https://infra.spec.whatwg.org/#forgiving-base64-decode + // Step 1: Remove all ASCII whitespace + var stripped = try std.ArrayList(u8).initCapacity(allocator, unescaped.len); + for (unescaped) |c| { + if (!std.ascii.isWhitespace(c)) { + stripped.appendAssumeCapacity(c); + } + } + const trimmed = std.mem.trimRight(u8, stripped.items, "="); + + // Length % 4 == 1 is invalid + if (trimmed.len % 4 == 1) { + return error.InvalidCharacterError; + } + + const decoded_size = std.base64.standard_no_pad.Decoder.calcSizeForSlice(trimmed) catch return error.InvalidCharacterError; + const buffer = try allocator.alloc(u8, decoded_size); + std.base64.standard_no_pad.Decoder.decode(buffer, trimmed) catch return error.InvalidCharacterError; + return buffer; } const testing = @import("../testing.zig"); diff --git a/src/browser/Session.zig b/src/browser/Session.zig index c9b4db48..540ba520 100644 --- a/src/browser/Session.zig +++ b/src/browser/Session.zig @@ -166,6 +166,7 @@ pub fn wait(self: *Session, wait_ms: u32) WaitResult { error.JsError => {}, // already logged (with hopefully more context) else => log.err(.browser, "session wait", .{ .err = err, + .url = page.url, }), } return .done; @@ -240,6 +241,9 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { // it AFTER. const ms_to_next_task = try browser.runMacrotasks(); + // Each call to this runs scheduled load events. + try page.dispatchLoad(); + const http_active = http_client.active; const total_network_activity = http_active + http_client.intercepted; if (page._notified_network_almost_idle.check(total_network_activity <= 2)) { @@ -257,7 +261,7 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { std.debug.assert(http_client.intercepted == 0); } - const ms = ms_to_next_task orelse blk: { + const ms: u64 = ms_to_next_task orelse blk: { if (wait_ms - ms_remaining < 100) { if (comptime builtin.is_test) { return .done; @@ -267,6 +271,14 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { // background jobs. break :blk 50; } + + if (browser.hasBackgroundTasks()) { + // _we_ have nothing to run, but v8 is working on + // background tasks. We'll wait for them. + browser.waitForBackgroundTasks(); + break :blk 20; + } + // No http transfers, no cdp extra socket, no // scheduled tasks, we're done. return .done; @@ -292,8 +304,14 @@ fn _wait(self: *Session, page: *Page, wait_ms: u32) !WaitResult { // an cdp_socket registered with the http client). // We should continue to run lowPriority tasks, so we // minimize how long we'll poll for network I/O. - const ms_to_wait = @min(200, @min(ms_remaining, ms_to_next_task orelse 200)); - if (try http_client.tick(ms_to_wait) == .cdp_socket) { + var ms_to_wait = @min(200, ms_to_next_task orelse 200); + if (ms_to_wait > 10 and browser.hasBackgroundTasks()) { + // if we have background tasks, we don't want to wait too + // long for a message from the client. We want to go back + // to the top of the loop and run macrotasks. + ms_to_wait = 10; + } + if (try http_client.tick(@min(ms_remaining, ms_to_wait)) == .cdp_socket) { // data on a socket we aren't handling, return to caller return .cdp_socket; } diff --git a/src/browser/URL.zig b/src/browser/URL.zig index 716480b1..b8d8d563 100644 --- a/src/browser/URL.zig +++ b/src/browser/URL.zig @@ -20,44 +20,61 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const ResolveOpts = struct { + encode: bool = false, always_dupe: bool = false, }; + // path is anytype, so that it can be used with both []const u8 and [:0]const u8 pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime opts: ResolveOpts) ![:0]const u8 { const PT = @TypeOf(path); if (base.len == 0 or isCompleteHTTPUrl(path)) { if (comptime opts.always_dupe or !isNullTerminated(PT)) { - return allocator.dupeZ(u8, path); + const duped = try allocator.dupeZ(u8, path); + return processResolved(allocator, duped, opts); + } + if (comptime opts.encode) { + return processResolved(allocator, path, opts); } return path; } if (path.len == 0) { if (comptime opts.always_dupe) { - return allocator.dupeZ(u8, base); + const duped = try allocator.dupeZ(u8, base); + return processResolved(allocator, duped, opts); + } + if (comptime opts.encode) { + return processResolved(allocator, base, opts); } return base; } if (path[0] == '?') { const base_path_end = std.mem.indexOfAny(u8, base, "?#") orelse base.len; - return std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path }); + const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_path_end], path }); + return processResolved(allocator, result, opts); } if (path[0] == '#') { const base_fragment_start = std.mem.indexOfScalar(u8, base, '#') orelse base.len; - return std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path }); + const result = try std.mem.joinZ(allocator, "", &.{ base[0..base_fragment_start], path }); + return processResolved(allocator, result, opts); } if (std.mem.startsWith(u8, path, "//")) { // network-path reference const index = std.mem.indexOfScalar(u8, base, ':') orelse { if (comptime isNullTerminated(PT)) { + if (comptime opts.encode) { + return processResolved(allocator, path, opts); + } return path; } - return allocator.dupeZ(u8, path); + const duped = try allocator.dupeZ(u8, path); + return processResolved(allocator, duped, opts); }; const protocol = base[0 .. index + 1]; - return std.mem.joinZ(allocator, "", &.{ protocol, path }); + const result = try std.mem.joinZ(allocator, "", &.{ protocol, path }); + return processResolved(allocator, result, opts); } const scheme_end = std.mem.indexOf(u8, base, "://"); @@ -65,7 +82,8 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime const path_start = std.mem.indexOfScalarPos(u8, base, authority_start, '/') orelse base.len; if (path[0] == '/') { - return std.mem.joinZ(allocator, "", &.{ base[0..path_start], path }); + const result = try std.mem.joinZ(allocator, "", &.{ base[0..path_start], path }); + return processResolved(allocator, result, opts); } var normalized_base: []const u8 = base[0..path_start]; @@ -127,7 +145,119 @@ pub fn resolve(allocator: Allocator, base: [:0]const u8, path: anytype, comptime // we always have an extra space out[out_i] = 0; - return out[0..out_i :0]; + return processResolved(allocator, out[0..out_i :0], opts); +} + +fn processResolved(allocator: Allocator, url: [:0]const u8, comptime opts: ResolveOpts) ![:0]const u8 { + if (!comptime opts.encode) { + return url; + } + return ensureEncoded(allocator, url); +} + +pub fn ensureEncoded(allocator: Allocator, url: [:0]const u8) ![:0]const u8 { + const scheme_end = std.mem.indexOf(u8, url, "://"); + const authority_start = if (scheme_end) |end| end + 3 else 0; + const path_start = std.mem.indexOfScalarPos(u8, url, authority_start, '/') orelse return url; + + const query_start = std.mem.indexOfScalarPos(u8, url, path_start, '?'); + const fragment_start = std.mem.indexOfScalarPos(u8, url, query_start orelse path_start, '#'); + + const path_end = query_start orelse fragment_start orelse url.len; + const query_end = if (query_start) |_| (fragment_start orelse url.len) else path_end; + + const path_to_encode = url[path_start..path_end]; + const encoded_path = try percentEncodeSegment(allocator, path_to_encode, true); + + const encoded_query = if (query_start) |qs| blk: { + const query_to_encode = url[qs + 1 .. query_end]; + const encoded = try percentEncodeSegment(allocator, query_to_encode, false); + break :blk encoded; + } else null; + + const encoded_fragment = if (fragment_start) |fs| blk: { + const fragment_to_encode = url[fs + 1 ..]; + const encoded = try percentEncodeSegment(allocator, fragment_to_encode, false); + break :blk encoded; + } else null; + + if (encoded_path.ptr == path_to_encode.ptr and + (encoded_query == null or encoded_query.?.ptr == url[query_start.? + 1 .. query_end].ptr) and + (encoded_fragment == null or encoded_fragment.?.ptr == url[fragment_start.? + 1 ..].ptr)) + { + // nothing has changed + return url; + } + + var buf = try std.ArrayList(u8).initCapacity(allocator, url.len + 20); + try buf.appendSlice(allocator, url[0..path_start]); + try buf.appendSlice(allocator, encoded_path); + if (encoded_query) |eq| { + try buf.append(allocator, '?'); + try buf.appendSlice(allocator, eq); + } + if (encoded_fragment) |ef| { + try buf.append(allocator, '#'); + try buf.appendSlice(allocator, ef); + } + try buf.append(allocator, 0); + return buf.items[0 .. buf.items.len - 1 :0]; +} + +fn percentEncodeSegment(allocator: Allocator, segment: []const u8, comptime is_path: bool) ![]const u8 { + // Check if encoding is needed + var needs_encoding = false; + for (segment) |c| { + if (shouldPercentEncode(c, is_path)) { + needs_encoding = true; + break; + } + } + if (!needs_encoding) { + return segment; + } + + var buf = try std.ArrayList(u8).initCapacity(allocator, segment.len + 10); + + var i: usize = 0; + while (i < segment.len) : (i += 1) { + const c = segment[i]; + + // Check if this is an already-encoded sequence (%XX) + if (c == '%' and i + 2 < segment.len) { + const end = i + 2; + const h1 = segment[i + 1]; + const h2 = segment[end]; + if (std.ascii.isHex(h1) and std.ascii.isHex(h2)) { + try buf.appendSlice(allocator, segment[i .. end + 1]); + i = end; + continue; + } + } + + if (shouldPercentEncode(c, is_path)) { + try buf.writer(allocator).print("%{X:0>2}", .{c}); + } else { + try buf.append(allocator, c); + } + } + + return buf.items; +} + +fn shouldPercentEncode(c: u8, comptime is_path: bool) bool { + return switch (c) { + // Unreserved characters (RFC 3986) + 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => false, + // sub-delims allowed in both path and query + '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => false, + // Separators allowed in both path and query + '/', ':', '@' => false, + // Query-specific: '?' is allowed in queries but not in paths + '?' => comptime is_path, + // Everything else needs encoding (including space) + else => true, + }; } fn isNullTerminated(comptime value: type) bool { @@ -512,6 +642,33 @@ pub fn getRobotsUrl(arena: Allocator, url: [:0]const u8) ![:0]const u8 { ); } +pub fn unescape(arena: Allocator, input: []const u8) ![]const u8 { + if (std.mem.indexOfScalar(u8, input, '%') == null) { + return input; + } + + var result = try std.ArrayList(u8).initCapacity(arena, input.len); + + var i: usize = 0; + while (i < input.len) { + if (input[i] == '%' and i + 2 < input.len) { + const hex = input[i + 1 .. i + 3]; + const byte = std.fmt.parseInt(u8, hex, 16) catch { + result.appendAssumeCapacity(input[i]); + i += 1; + continue; + }; + result.appendAssumeCapacity(byte); + i += 3; + } else { + result.appendAssumeCapacity(input[i]); + i += 1; + } + } + + return result.items; +} + const testing = @import("../testing.zig"); test "URL: isCompleteHTTPUrl" { try testing.expectEqual(true, isCompleteHTTPUrl("http://example.com/about")); @@ -691,6 +848,293 @@ test "URL: resolve" { } } +test "URL: ensureEncoded" { + defer testing.reset(); + + const Case = struct { + url: [:0]const u8, + expected: [:0]const u8, + }; + + const cases = [_]Case{ + .{ + .url = "https://example.com/over 9000!", + .expected = "https://example.com/over%209000!", + }, + .{ + .url = "http://example.com/hello world.html", + .expected = "http://example.com/hello%20world.html", + }, + .{ + .url = "https://example.com/file[1].html", + .expected = "https://example.com/file%5B1%5D.html", + }, + .{ + .url = "https://example.com/file{name}.html", + .expected = "https://example.com/file%7Bname%7D.html", + }, + .{ + .url = "https://example.com/page?query=hello world", + .expected = "https://example.com/page?query=hello%20world", + }, + .{ + .url = "https://example.com/page?a=1&b=value with spaces", + .expected = "https://example.com/page?a=1&b=value%20with%20spaces", + }, + .{ + .url = "https://example.com/page#section one", + .expected = "https://example.com/page#section%20one", + }, + .{ + .url = "https://example.com/my path?query=my value#my anchor", + .expected = "https://example.com/my%20path?query=my%20value#my%20anchor", + }, + .{ + .url = "https://example.com/already%20encoded", + .expected = "https://example.com/already%20encoded", + }, + .{ + .url = "https://example.com/file%5B1%5D.html", + .expected = "https://example.com/file%5B1%5D.html", + }, + .{ + .url = "https://example.com/caf%C3%A9", + .expected = "https://example.com/caf%C3%A9", + }, + .{ + .url = "https://example.com/page?query=already%20encoded", + .expected = "https://example.com/page?query=already%20encoded", + }, + .{ + .url = "https://example.com/page?a=1&b=value%20here", + .expected = "https://example.com/page?a=1&b=value%20here", + }, + .{ + .url = "https://example.com/page#section%20one", + .expected = "https://example.com/page#section%20one", + }, + .{ + .url = "https://example.com/part%20encoded and not", + .expected = "https://example.com/part%20encoded%20and%20not", + }, + .{ + .url = "https://example.com/page?a=encoded%20value&b=not encoded", + .expected = "https://example.com/page?a=encoded%20value&b=not%20encoded", + }, + .{ + .url = "https://example.com/my%20path?query=not encoded#encoded%20anchor", + .expected = "https://example.com/my%20path?query=not%20encoded#encoded%20anchor", + }, + .{ + .url = "https://example.com/fully%20encoded?query=also%20encoded#and%20this", + .expected = "https://example.com/fully%20encoded?query=also%20encoded#and%20this", + }, + .{ + .url = "https://example.com/path-with_under~tilde", + .expected = "https://example.com/path-with_under~tilde", + }, + .{ + .url = "https://example.com/sub-delims!$&'()*+,;=", + .expected = "https://example.com/sub-delims!$&'()*+,;=", + }, + .{ + .url = "https://example.com", + .expected = "https://example.com", + }, + .{ + .url = "https://example.com?query=value", + .expected = "https://example.com?query=value", + }, + .{ + .url = "https://example.com/clean/path", + .expected = "https://example.com/clean/path", + }, + .{ + .url = "https://example.com/path?clean=query#clean-fragment", + .expected = "https://example.com/path?clean=query#clean-fragment", + }, + .{ + .url = "https://example.com/100% complete", + .expected = "https://example.com/100%25%20complete", + }, + .{ + .url = "https://example.com/path?value=100% done", + .expected = "https://example.com/path?value=100%25%20done", + }, + }; + + for (cases) |case| { + const result = try ensureEncoded(testing.arena_allocator, case.url); + try testing.expectString(case.expected, result); + } +} + +test "URL: resolve with encoding" { + defer testing.reset(); + + const Case = struct { + base: [:0]const u8, + path: [:0]const u8, + expected: [:0]const u8, + }; + + const cases = [_]Case{ + // Spaces should be encoded as %20, but ! is allowed + .{ + .base = "https://example.com/dir/", + .path = "over 9000!", + .expected = "https://example.com/dir/over%209000!", + }, + .{ + .base = "https://example.com/", + .path = "hello world.html", + .expected = "https://example.com/hello%20world.html", + }, + // Multiple spaces + .{ + .base = "https://example.com/", + .path = "path with multiple spaces", + .expected = "https://example.com/path%20with%20%20multiple%20%20%20spaces", + }, + // Special characters that need encoding + .{ + .base = "https://example.com/", + .path = "file[1].html", + .expected = "https://example.com/file%5B1%5D.html", + }, + .{ + .base = "https://example.com/", + .path = "file{name}.html", + .expected = "https://example.com/file%7Bname%7D.html", + }, + .{ + .base = "https://example.com/", + .path = "file.html", + .expected = "https://example.com/file%3Ctest%3E.html", + }, + .{ + .base = "https://example.com/", + .path = "file\"quote\".html", + .expected = "https://example.com/file%22quote%22.html", + }, + .{ + .base = "https://example.com/", + .path = "file|pipe.html", + .expected = "https://example.com/file%7Cpipe.html", + }, + .{ + .base = "https://example.com/", + .path = "file\\backslash.html", + .expected = "https://example.com/file%5Cbackslash.html", + }, + .{ + .base = "https://example.com/", + .path = "file^caret.html", + .expected = "https://example.com/file%5Ecaret.html", + }, + .{ + .base = "https://example.com/", + .path = "file`backtick`.html", + .expected = "https://example.com/file%60backtick%60.html", + }, + // Characters that should NOT be encoded + .{ + .base = "https://example.com/", + .path = "path-with_under~tilde.html", + .expected = "https://example.com/path-with_under~tilde.html", + }, + .{ + .base = "https://example.com/", + .path = "path/with/slashes", + .expected = "https://example.com/path/with/slashes", + }, + .{ + .base = "https://example.com/", + .path = "sub-delims!$&'()*+,;=.html", + .expected = "https://example.com/sub-delims!$&'()*+,;=.html", + }, + // Already encoded characters should not be double-encoded + .{ + .base = "https://example.com/", + .path = "already%20encoded", + .expected = "https://example.com/already%20encoded", + }, + .{ + .base = "https://example.com/", + .path = "file%5B1%5D.html", + .expected = "https://example.com/file%5B1%5D.html", + }, + // Mix of encoded and unencoded + .{ + .base = "https://example.com/", + .path = "part%20encoded and not", + .expected = "https://example.com/part%20encoded%20and%20not", + }, + // Query strings and fragments ARE encoded + .{ + .base = "https://example.com/", + .path = "file name.html?query=value with spaces", + .expected = "https://example.com/file%20name.html?query=value%20with%20spaces", + }, + .{ + .base = "https://example.com/", + .path = "file name.html#anchor with spaces", + .expected = "https://example.com/file%20name.html#anchor%20with%20spaces", + }, + .{ + .base = "https://example.com/", + .path = "file.html?hello=world !", + .expected = "https://example.com/file.html?hello=world%20!", + }, + // Query structural characters should NOT be encoded + .{ + .base = "https://example.com/", + .path = "file.html?a=1&b=2", + .expected = "https://example.com/file.html?a=1&b=2", + }, + // Relative paths with encoding + .{ + .base = "https://example.com/dir/page.html", + .path = "../other dir/file.html", + .expected = "https://example.com/other%20dir/file.html", + }, + .{ + .base = "https://example.com/dir/", + .path = "./sub dir/file.html", + .expected = "https://example.com/dir/sub%20dir/file.html", + }, + // Absolute paths with encoding + .{ + .base = "https://example.com/some/path", + .path = "/absolute path/file.html", + .expected = "https://example.com/absolute%20path/file.html", + }, + // Unicode/high bytes (though ideally these should be UTF-8 encoded first) + .{ + .base = "https://example.com/", + .path = "café", + .expected = "https://example.com/caf%C3%A9", + }, + // Empty path + .{ + .base = "https://example.com/", + .path = "", + .expected = "https://example.com/", + }, + // Complete URL as path (should not be encoded) + .{ + .base = "https://example.com/", + .path = "https://other.com/path with spaces", + .expected = "https://other.com/path%20with%20spaces", + }, + }; + + for (cases) |case| { + const result = try resolve(testing.arena_allocator, case.base, case.path, .{ .encode = true }); + try testing.expectString(case.expected, result); + } +} + test "URL: eqlDocument" { defer testing.reset(); { @@ -816,3 +1260,68 @@ test "URL: getRobotsUrl" { try testing.expectString("https://example.com/robots.txt", url); } } + +test "URL: unescape" { + defer testing.reset(); + const arena = testing.arena_allocator; + + { + const result = try unescape(arena, "hello world"); + try testing.expectEqual("hello world", result); + } + + { + const result = try unescape(arena, "hello%20world"); + try testing.expectEqual("hello world", result); + } + + { + const result = try unescape(arena, "%48%65%6c%6c%6f"); + try testing.expectEqual("Hello", result); + } + + { + const result = try unescape(arena, "%48%65%6C%6C%6F"); + try testing.expectEqual("Hello", result); + } + + { + const result = try unescape(arena, "a%3Db"); + try testing.expectEqual("a=b", result); + } + + { + const result = try unescape(arena, "a%3DB"); + try testing.expectEqual("a=B", result); + } + + { + const result = try unescape(arena, "ZDIgPSAndHdvJzs%3D"); + try testing.expectEqual("ZDIgPSAndHdvJzs=", result); + } + + { + const result = try unescape(arena, "%5a%44%4d%67%50%53%41%6e%64%47%68%79%5a%57%55%6e%4f%77%3D%3D"); + try testing.expectEqual("ZDMgPSAndGhyZWUnOw==", result); + } + + { + const result = try unescape(arena, "hello%2world"); + try testing.expectEqual("hello%2world", result); + } + + { + const result = try unescape(arena, "hello%ZZworld"); + try testing.expectEqual("hello%ZZworld", result); + } + + { + const result = try unescape(arena, "hello%"); + try testing.expectEqual("hello%", result); + } + + { + const result = try unescape(arena, "hello%2"); + try testing.expectEqual("hello%2", result); + } +} diff --git a/src/browser/css/Tokenizer.zig b/src/browser/css/Tokenizer.zig index 17104e95..e90c8d46 100644 --- a/src/browser/css/Tokenizer.zig +++ b/src/browser/css/Tokenizer.zig @@ -583,7 +583,7 @@ fn consumeNumeric(self: *Tokenizer) Token { }; self.advance(2); - } else if (self.hasAtLeast(1) and std.ascii.isDigit(self.byteAt(2))) { + } else if (self.hasAtLeast(2) and std.ascii.isDigit(self.byteAt(2))) { self.advance(1); } else { break :blk; diff --git a/src/browser/dump.zig b/src/browser/dump.zig index a8b2bec6..bb666e7f 100644 --- a/src/browser/dump.zig +++ b/src/browser/dump.zig @@ -20,16 +20,15 @@ const std = @import("std"); const Page = @import("Page.zig"); const Node = @import("webapi/Node.zig"); const Slot = @import("webapi/element/html/Slot.zig"); +const IFrame = @import("webapi/element/html/IFrame.zig"); -pub const RootOpts = struct { - with_base: bool = false, - strip: Opts.Strip = .{}, - shadow: Opts.Shadow = .rendered, -}; +const IS_DEBUG = @import("builtin").mode == .Debug; pub const Opts = struct { - strip: Strip = .{}, - shadow: Shadow = .rendered, + with_base: bool = false, + with_frames: bool = false, + strip: Opts.Strip = .{}, + shadow: Opts.Shadow = .rendered, pub const Strip = struct { js: bool = false, @@ -49,7 +48,7 @@ pub const Opts = struct { }; }; -pub fn root(doc: *Node.Document, opts: RootOpts, writer: *std.Io.Writer, page: *Page) !void { +pub fn root(doc: *Node.Document, opts: Opts, writer: *std.Io.Writer, page: *Page) !void { if (doc.is(Node.Document.HTMLDocument)) |html_doc| { blk: { // Ideally we just render the doctype which is part of the document @@ -71,7 +70,7 @@ pub fn root(doc: *Node.Document, opts: RootOpts, writer: *std.Io.Writer, page: * } } - return deep(doc.asNode(), .{ .strip = opts.strip, .shadow = opts.shadow }, writer, page); + return deep(doc.asNode(), opts, writer, page); } pub fn deep(node: *Node, opts: Opts, writer: *std.Io.Writer, page: *Page) error{WriteFailed}!void { @@ -83,19 +82,19 @@ fn _deep(node: *Node, opts: Opts, comptime force_slot: bool, writer: *std.Io.Wri .cdata => |cd| { if (node.is(Node.CData.Comment)) |_| { try writer.writeAll(""); } else if (node.is(Node.CData.ProcessingInstruction)) |pi| { try writer.writeAll(""); } else { if (shouldEscapeText(node._parent)) { - try writeEscapedText(cd.getData(), writer); + try writeEscapedText(cd.getData().str(), writer); } else { - try writer.writeAll(cd.getData()); + try writer.writeAll(cd.getData().str()); } } }, @@ -140,7 +139,24 @@ fn _deep(node: *Node, opts: Opts, comptime force_slot: bool, writer: *std.Io.Wri } } - try children(node, opts, writer, page); + if (opts.with_frames and el.is(IFrame) != null) { + const frame = el.as(IFrame); + if (frame.getContentDocument()) |doc| { + // A frame's document should always ahave a page, but + // I'm not willing to crash a release build on that assertion. + if (comptime IS_DEBUG) { + std.debug.assert(doc._page != null); + } + if (doc._page) |frame_page| { + try writer.writeByte('\n'); + root(doc, opts, writer, frame_page) catch return error.WriteFailed; + try writer.writeByte('\n'); + } + } + } else { + try children(node, opts, writer, page); + } + if (!isVoidElement(el)) { try writer.writeAll("\n"); }, .document_fragment => try children(node, opts, writer, page), - .attribute => unreachable, + .attribute => { + // Not called normally, but can be called via XMLSerializer.serializeToString + // in which case it should return an empty string + try writer.writeAll(""); + }, } } @@ -294,6 +314,12 @@ fn shouldEscapeText(node_: ?*Node) bool { if (node.is(Node.Element.Html.Script) != null) { return false; } + // When scripting is enabled,