apache · kentkwu · Mar 10, 2026 · Copilot · Apr 14, 2026 · Copilot
diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Benchmarks
+on:
+  push:
+    branches: [main]
+  pull_request:
+    paths:
+      - ".github/workflows/benchmark.yaml"
+      - "ci/scripts/bench.sh"
+      - "ci/scripts/bench_adapt.py"
+      - "perf/**"
+  workflow_dispatch:
+permissions:
+  contents: read
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - name: Set up Node.js
+        uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
+        with:
+          node-version: '20'
+          cache: npm
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+        with:
+          python-version: '3.11'
+      - name: Run Benchmarks
+        if: github.event_name != 'push'
+        run: bash ci/scripts/bench.sh $(pwd)
+      - name: Upload results
+        if: github.event_name == 'push' && github.repository == 'apache/arrow-js' && github.ref_name == 'main'
+        env:
+          CONBENCH_URL: https://conbench.arrow-dev.org
+          CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
+          CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
+          CONBENCH_REF: ${{ github.ref_name }}
+          CONBENCH_MACHINE_INFO_NAME: amd64-ubuntu-24
+        run: |
+          python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
-          python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
+          python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@0123456789abcdef0123456789abcdef01234567#subdirectory=benchadapt/python
-          python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
+          python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@0123456789abcdef0123456789abcdef01234567#subdirectory=benchadapt/python
+          python3 ci/scripts/bench_adapt.py
diff --git a/.gitignore b/.gitignore
@@ -94,3 +94,5 @@ dev/release/rat.xml
 
 # Release
 dev/release/.env
+bench_stats.json
+__pycache__/
diff --git a/ci/scripts/bench.sh b/ci/scripts/bench.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Runs JavaScript benchmarks. If `--json` is passed as the second argument,
+# benchmark results are written to bench_stats.json in the calling directory.
+
+set -ex
+
+if [ -z "$1" ]; then
+  echo "Error: Missing source directory argument"
+  exit 1
+fi
+
+source_dir="$1"
+
+pushd "${source_dir}"
+
+npm ci
+
+if [[ "$2" = "--json" ]]; then
+  npm run perf -- --json 2>"${OLDPWD}/bench_stats.json"
+else
+  npm run perf
+fi
+
+popd
diff --git a/ci/scripts/bench_adapt.py b/ci/scripts/bench_adapt.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import os
+import uuid
+import logging
+from pathlib import Path
+from typing import List
+
+from benchadapt import BenchmarkResult
+from benchadapt.adapters import BenchmarkAdapter
+from benchadapt.log import log
+
+log.setLevel(logging.DEBUG)
+
+ARROW_ROOT = Path(__file__).parent.parent.parent.resolve()
+SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts"
+
+# `github_commit_info` is meant to communicate GitHub-flavored commit
+# information to Conbench. See
+# https://github.com/conbench/conbench/blob/cf7931f/benchadapt/python/benchadapt/result.py#L66
+# for a specification.
+github_commit_info = {"repository": "https://github.com/apache/arrow-js"}
+
+if os.environ.get("CONBENCH_REF") == "main":
+    # Assume GitHub Actions CI. The environment variable lookups below are
+    # expected to fail when not running in GitHub Actions.
+    github_commit_info = {
+        "repository": f'{os.environ["GITHUB_SERVER_URL"]}/{os.environ["GITHUB_REPOSITORY"]}',
+        "commit": os.environ["GITHUB_SHA"],
+        "pr_number": None,  # implying default branch
+    }
+    run_reason = "commit"
+else:
+    # Local dev environment. Do not include commit information since this is
+    # not a controlled CI environment.
+    # Allow user to optionally inject a custom piece of information into the
+    # run reason via environment.
+    run_reason = "localdev"
+    custom_reason_suffix = os.getenv("CONBENCH_CUSTOM_RUN_REASON")
+    if custom_reason_suffix is not None:
+        run_reason += f" {custom_reason_suffix.strip()}"
+
+
+class JSAdapter(BenchmarkAdapter):
+    # bench.sh writes bench_stats.json into the calling directory (repo root)
+    result_file = str(ARROW_ROOT / "bench_stats.json")
+    command = ["bash", str(SCRIPTS_PATH / "bench.sh"), str(ARROW_ROOT), "--json"]
+
+    def __init__(self, *args, **kwargs) -> None:
-    # bench.sh writes bench_stats.json into the calling directory (repo root)
-    result_file = str(ARROW_ROOT / "bench_stats.json")
-    command = ["bash", str(SCRIPTS_PATH / "bench.sh"), str(ARROW_ROOT), "--json"]
-
-    def __init__(self, *args, **kwargs) -> None:
+    # bench.sh writes bench_stats.json into the calling directory of this
+    # process, so resolve the result file from the current working directory
+    # at runtime instead of assuming the repo root is the caller's cwd.
+    command = ["bash", str(SCRIPTS_PATH / "bench.sh"), str(ARROW_ROOT), "--json"]
+
+    def __init__(self, *args, **kwargs) -> None:
+        self.result_file = str(Path.cwd() / "bench_stats.json")
-    # bench.sh writes bench_stats.json into the calling directory (repo root)
-    result_file = str(ARROW_ROOT / "bench_stats.json")
-    command = ["bash", str(SCRIPTS_PATH / "bench.sh"), str(ARROW_ROOT), "--json"]
-
-    def __init__(self, *args, **kwargs) -> None:
+    # bench.sh writes bench_stats.json into the calling directory of this
+    # process, so resolve the result file from the current working directory
+    # at runtime instead of assuming the repo root is the caller's cwd.
+    command = ["bash", str(SCRIPTS_PATH / "bench.sh"), str(ARROW_ROOT), "--json"]
+
+    def __init__(self, *args, **kwargs) -> None:
+        self.result_file = str(Path.cwd() / "bench_stats.json")
+        super().__init__(command=self.command, *args, **kwargs)
+
+    def _transform_results(self) -> List[BenchmarkResult]:
+        with open(self.result_file, "r") as f:
+            raw_results = json.load(f)
+
+        run_id = uuid.uuid4().hex
+
+        # Group results by suite so each suite shares a batch_id
+        suite_batch_ids: dict = {}
+
+        parsed_results = []
+        for result in raw_results:
+            suite = result.get("suite", "unknown")
+            if suite not in suite_batch_ids:
+                suite_batch_ids[suite] = uuid.uuid4().hex
+            batch_id = suite_batch_ids[suite]
+
+            # benny reports:
+            #   ops            - operations per second
+            #   details.median - median time per operation, in seconds
+            #   samples        - number of samples collected
+            parsed = BenchmarkResult(
+                run_id=run_id,
+                batch_id=batch_id,
+                stats={
+                    "data": [result["ops"]],
+                    "unit": "i/s",
+                    "times": [result["details"]["median"]],
+                    "time_unit": "s",
+                    "iterations": result["samples"],
+                },
+                context={
+                    "benchmark_language": "JavaScript",
+                },
+                tags={
+                    "suite": suite,
+                    "name": result["name"],
+                },
+                run_reason=run_reason,
+                github=github_commit_info,
+            )
+            parsed.run_name = (
+                f"{parsed.run_reason}: {github_commit_info.get('commit')}"
-            parsed.run_name = (
-                f"{parsed.run_reason}: {github_commit_info.get('commit')}"
+            commit = github_commit_info.get("commit")
+            parsed.run_name = (
+                f"{parsed.run_reason}: {commit}" if commit else parsed.run_reason
-            parsed.run_name = (
-                f"{parsed.run_reason}: {github_commit_info.get('commit')}"
+            commit = github_commit_info.get("commit")
+            parsed.run_name = (
+                f"{parsed.run_reason}: {commit}" if commit else parsed.run_reason
+            )
+            parsed_results.append(parsed)
+
+        return parsed_results
+
+
+if __name__ == "__main__":
+    js_adapter = JSAdapter(result_fields_override={"info": {}})
+    js_adapter()
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -10,7 +10,7 @@
     "build": "cross-env NODE_NO_WARNINGS=1 gulp build",
     "clean": "cross-env NODE_NO_WARNINGS=1 gulp clean",
     "debug": "cross-env NODE_NO_WARNINGS=1 gulp debug",
-    "perf": "node --no-warnings --loader ts-node/esm/transpile-only perf/index.ts",
+    "perf": "node --no-warnings --import tsx/esm perf/index.ts",
     "test:integration": "bin/integration.ts --mode validate",
     "release": "./npm-release.sh",
     "test:coverage": "gulp test -t src --coverage",
@@ -100,6 +100,7 @@
     "rollup": "4.59.0",
     "rxjs": "7.8.2",
     "ts-jest": "29.1.4",
+    "tsx": "^4.19.3",
-    "tsx": "^4.19.3",
+    "tsx": "4.19.3",
-    "tsx": "^4.19.3",
+    "tsx": "4.19.3",
     "typedoc": "0.28.17",
     "typescript": "5.4.5",
     "typescript-eslint": "8.57.0",