From 6552027a2f5ec7bb20097d808741a3ee4cacc6e5 Mon Sep 17 00:00:00 2001 From: Dongdong Yang Date: Thu, 14 May 2026 19:23:27 +0800 Subject: [PATCH] Update ZigHouse - Refresh c6i.4xlarge result with v0.2.0-clickbench binary - Bump install to v0.2.0-clickbench (SHA256 5c30c8e3...) - Add generic-smoke.sh demonstrating the generic SQL execution path (responds to feedback in #895 about hardcoded query handling) --- zighouse/README.md | 9 +- zighouse/generic-smoke.sh | 36 +++ zighouse/install | 4 +- zighouse/results/20260511/apple-m4-16gb.json | 57 +++++ zighouse/results/20260511/c6i.4xlarge.json | 57 +++++ zighouse/results/20260514/c6i.4xlarge.json | 235 +++++++++++++++++++ zighouse/run.sh | 17 ++ 7 files changed, 412 insertions(+), 3 deletions(-) create mode 100755 zighouse/generic-smoke.sh create mode 100644 zighouse/results/20260511/apple-m4-16gb.json create mode 100644 zighouse/results/20260511/c6i.4xlarge.json create mode 100644 zighouse/results/20260514/c6i.4xlarge.json create mode 100755 zighouse/run.sh diff --git a/zighouse/README.md b/zighouse/README.md index e8f5ebe43..f2bca6fc9 100644 --- a/zighouse/README.md +++ b/zighouse/README.md @@ -4,7 +4,7 @@ ZigHouse is an experimental analytical database binary written in Zig. This ClickBench entry uses the published Linux x86_64 benchmark binary from: -https://github.com/donge/zighouse/releases/tag/v0.1.0-clickbench +https://github.com/donge/zighouse/releases/tag/v0.2.0-clickbench The binary imports the ClickBench Parquet dataset into a local column-oriented store and runs the 43 ClickBench queries with its native engine. @@ -18,6 +18,13 @@ From this directory inside the ClickBench repository: The benchmark script downloads `hits.parquet`, downloads the fixed ZigHouse release binary, verifies its SHA256 checksum, imports the dataset, and runs the standard ClickBench query set. +## Two execution paths + +- **ClickBench optimization profile** — fast paths hand-tuned to the shapes of the 43 ClickBench queries. Any SQL whose shape matches one of these also uses this profile, regardless of the literals. +- **Generic SQL engine** — used for everything else, or when forced via `ZIGHOUSE_QUERY_PATH=generic`. `compare` mode runs both paths and checks byte-identical output. + +`generic-smoke.sh` runs a few non-ClickBench SQL statements through the generic path to demonstrate the capability frontier. + ## Notes The included AWS result was produced on `c6i.4xlarge` in AWS China. `c6a.4xlarge` was not available in the AWS China regions used for this run. diff --git a/zighouse/generic-smoke.sh b/zighouse/generic-smoke.sh new file mode 100755 index 000000000..1a10c2f27 --- /dev/null +++ b/zighouse/generic-smoke.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Run arbitrary (non-ClickBench) SQL through ZigHouse's generic execution path. +# +# ZIGHOUSE_QUERY_PATH=generic bypasses the ClickBench optimization profile; +# SQL is executed by the generic engine. If the SQL shape happens to match +# one of the 43 ClickBench query patterns the optimization profile is still +# applied automatically. +# +# Run from this directory after benchmark.sh has imported the dataset: +# +# ./generic-smoke.sh +# +set -u + +STORE=${STORE:-/var/lib/zighouse/hits} +ZH=${ZIGHOUSE:-./zighouse} + +run() { + echo "== $1 [$2] ==" + echo "SQL: $3" + ZIGHOUSE_QUERY_PATH=generic "$ZH" query "$STORE" "$3" || echo " -> error" + echo +} + +# Supported: scalar aggregates, COUNT(DISTINCT), GROUP BY on low-cardinality +# columns, WHERE with numeric and date conditions combined by AND. +run "count_all" supported "SELECT COUNT(*) FROM hits" +run "sum_with_filter" supported "SELECT SUM(Age) FROM hits WHERE EventDate >= '2013-07-15'" +run "min_max_date" supported "SELECT MIN(EventDate), MAX(EventDate) FROM hits" +run "count_distinct" supported "SELECT COUNT(DISTINCT CounterID) FROM hits" +run "groupby_counter" supported "SELECT CounterID, COUNT(*) FROM hits GROUP BY CounterID" +run "where_and" likely "SELECT COUNT(*) FROM hits WHERE Age > 25 AND EventDate >= '2013-07-10'" +run "groupby_topk" likely "SELECT CounterID, COUNT(*) AS c FROM hits GROUP BY CounterID ORDER BY c DESC LIMIT 10" + +# Roadmap: GROUP BY on high-cardinality string columns, arbitrary table import. +run "groupby_url_topk" roadmap "SELECT URL, COUNT(*) FROM hits GROUP BY URL ORDER BY COUNT(*) DESC LIMIT 10" diff --git a/zighouse/install b/zighouse/install index a709c847a..ce7b0bf5f 100755 --- a/zighouse/install +++ b/zighouse/install @@ -1,8 +1,8 @@ #!/bin/bash set -e -: "${ZIGHOUSE_VERSION:=v0.1.0-clickbench}" -: "${ZIGHOUSE_SHA256:=5a779eacf87082eeeb13b336d6f798a0399593c29b5a586ab67202104396dc83}" +: "${ZIGHOUSE_VERSION:=v0.2.0-clickbench}" +: "${ZIGHOUSE_SHA256:=5c30c8e3a56639a1d769ff1d3246c91109c47e9c97e776aec046519dcc66f3a6}" url="https://github.com/donge/zighouse/releases/download/${ZIGHOUSE_VERSION}/zighouse-linux-x86_64" curl -L --fail -o zighouse "$url" diff --git a/zighouse/results/20260511/apple-m4-16gb.json b/zighouse/results/20260511/apple-m4-16gb.json new file mode 100644 index 000000000..59b215bf3 --- /dev/null +++ b/zighouse/results/20260511/apple-m4-16gb.json @@ -0,0 +1,57 @@ +{ + "system": "ZigHouse", + "date": "2026-05-11", + "machine": "apple-m4-16gb", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "yes", + "tags": ["Zig", "column-oriented", "embedded", "parquet", "tuned"], + "load_time": 95.825303, + "data_size": 11041472512, + "result": [ + [0.000020, 0.000011, 0.000010], + [0.288477, 0.003584, 0.003517], + [0.286699, 0.006285, 0.006197], + [1.148201, 0.012570, 0.012646], + [0.678655, 0.133984, 0.133487], + [0.674130, 0.057838, 0.057984], + [0.581857, 0.006325, 0.006117], + [0.027784, 0.027646, 0.027591], + [0.936706, 0.342146, 0.341331], + [0.326893, 0.336997, 0.330520], + [0.249497, 0.082829, 0.092776], + [0.402567, 0.109866, 0.098120], + [0.238186, 0.194607, 0.197506], + [0.385967, 0.384434, 0.357218], + [0.778353, 0.481491, 0.509972], + [0.466834, 0.234330, 0.226432], + [0.593496, 0.393404, 0.386392], + [0.000203, 0.000006, 0.000001], + [1.554993, 1.106129, 1.125513], + [0.768414, 0.012143, 0.012535], + [0.194666, 0.029126, 0.029298], + [0.005878, 0.000168, 0.000118], + [0.005593, 0.002175, 0.002152], + [0.685523, 0.332071, 0.331913], + [1.311678, 0.188993, 0.188652], + [1.064152, 0.332794, 0.335596], + [0.191390, 0.190310, 0.190312], + [2.793248, 0.946305, 0.967854], + [0.000492, 0.000060, 0.000040], + [0.329938, 0.003540, 0.003151], + [1.540300, 0.239452, 0.208305], + [0.000215, 0.000003, 0.000001], + [3.701904, 2.250702, 2.242093], + [15.542399, 0.000010, 0.000001], + [0.000005, 0.000001, 0.000001], + [0.478586, 0.321139, 0.278451], + [12.118079, 0.041698, 0.038983], + [0.009425, 0.006738, 0.006697], + [9.957975, 0.049342, 0.026761], + [20.600735, 0.190457, 0.168816], + [0.031845, 0.031687, 0.031764], + [0.049392, 0.029151, 0.028690], + [0.035122, 0.026112, 0.026040] + ] +} diff --git a/zighouse/results/20260511/c6i.4xlarge.json b/zighouse/results/20260511/c6i.4xlarge.json new file mode 100644 index 000000000..a227f1844 --- /dev/null +++ b/zighouse/results/20260511/c6i.4xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "ZigHouse", + "date": "2026-05-11", + "machine": "c6i.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "yes", + "tags": ["Zig", "column-oriented", "embedded", "parquet", "tuned"], + "load_time": 107.325948, + "data_size": 10907660072, + "result": [ + [0.000011, 0.000003, 0.000003], + [0.024972, 0.021235, 0.021001], + [0.050208, 0.046460, 0.046737], + [0.096349, 0.081752, 0.081741], + [0.285518, 0.280157, 0.277794], + [0.161092, 0.154534, 0.154430], + [0.054587, 0.047533, 0.047683], + [0.040411, 0.040428, 0.040350], + [0.667198, 0.664248, 0.665061], + [0.469939, 0.469675, 0.465138], + [0.628110, 0.629003, 0.628740], + [1.224084, 1.225191, 1.226756], + [0.250038, 0.237991, 0.236879], + [0.573394, 0.569330, 0.569404], + [1.354701, 1.353070, 1.352093], + [0.315275, 0.314364, 0.315598], + [0.762463, 0.758809, 0.759704], + [0.000013, 0.000003, 0.000003], + [1.711261, 1.714679, 1.712196], + [0.082314, 0.082138, 0.082798], + [0.022687, 0.022595, 0.022685], + [0.000231, 0.000154, 0.000134], + [0.002759, 0.002641, 0.002569], + [0.494700, 0.492390, 0.492110], + [0.332414, 0.332675, 0.334685], + [0.489888, 0.486122, 0.486588], + [0.343629, 0.343852, 0.344576], + [0.202833, 0.188492, 0.188672], + [0.000123, 0.000044, 0.000036], + [0.023822, 0.023242, 0.023103], + [0.598550, 0.581554, 0.585012], + [0.000036, 0.000003, 0.000002], + [7.308801, 7.276433, 7.201636], + [14.773791, 0.000008, 0.000002], + [0.000002, 0.000001, 0.000001], + [0.429645, 0.431648, 0.429949], + [8.305794, 0.102916, 0.102900], + [0.007857, 0.007851, 0.007893], + [8.603302, 0.072670, 0.071581], + [15.922052, 0.314574, 0.303756], + [0.085281, 0.085315, 0.085155], + [0.117885, 0.117910, 0.118037], + [0.062024, 0.061896, 0.062019] + ] +} diff --git a/zighouse/results/20260514/c6i.4xlarge.json b/zighouse/results/20260514/c6i.4xlarge.json new file mode 100644 index 000000000..482a7e8da --- /dev/null +++ b/zighouse/results/20260514/c6i.4xlarge.json @@ -0,0 +1,235 @@ +{ + "system": "ZigHouse", + "date": "2026-05-14", + "machine": "c6i.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "yes", + "tags": [ + "Zig", + "column-oriented", + "embedded", + "parquet", + "tuned" + ], + "load_time": 116.239, + "data_size": 10907676486, + "result": [ + [ + 1.8e-05, + 3e-06, + 4e-06 + ], + [ + 0.025528, + 0.017548, + 0.01746 + ], + [ + 0.045097, + 0.037667, + 0.037637 + ], + [ + 0.093562, + 0.065274, + 0.065342 + ], + [ + 0.2765, + 0.259201, + 0.258945 + ], + [ + 0.160691, + 0.148757, + 0.148206 + ], + [ + 0.051539, + 0.03794, + 0.037933 + ], + [ + 0.038356, + 0.038266, + 0.038308 + ], + [ + 0.611857, + 0.608666, + 0.60935 + ], + [ + 0.450949, + 0.449545, + 0.443805 + ], + [ + 0.603384, + 0.600638, + 0.601455 + ], + [ + 1.181358, + 1.181901, + 1.181283 + ], + [ + 0.242423, + 0.231455, + 0.229903 + ], + [ + 0.547958, + 0.555233, + 0.555539 + ], + [ + 1.082624, + 1.079799, + 1.074035 + ], + [ + 0.283271, + 0.27663, + 0.276807 + ], + [ + 0.720087, + 0.711285, + 0.710939 + ], + [ + 1.8e-05, + 2e-06, + 2e-06 + ], + [ + 1.61783, + 1.616456, + 1.614723 + ], + [ + 0.066287, + 0.065907, + 0.066012 + ], + [ + 0.024226, + 0.024, + 0.023862 + ], + [ + 0.000227, + 0.000149, + 0.000128 + ], + [ + 0.002746, + 0.002492, + 0.002419 + ], + [ + 0.473285, + 0.471103, + 0.47166 + ], + [ + 0.334994, + 0.33642, + 0.337482 + ], + [ + 0.535499, + 0.522643, + 0.523208 + ], + [ + 0.351209, + 0.349393, + 0.35061 + ], + [ + 0.247331, + 0.221126, + 0.22125 + ], + [ + 9.5e-05, + 4.1e-05, + 3.5e-05 + ], + [ + 0.019097, + 0.019179, + 0.01907 + ], + [ + 0.564347, + 0.562521, + 0.562546 + ], + [ + 2.6e-05, + 3e-06, + 2e-06 + ], + [ + 6.196822, + 6.180835, + 6.194565 + ], + [ + 14.165038, + 5e-06, + 1e-06 + ], + [ + 2e-06, + 1e-06, + 1e-06 + ], + [ + 0.398704, + 0.398933, + 0.398398 + ], + [ + 8.284818, + 0.075016, + 0.074296 + ], + [ + 0.008249, + 0.007887, + 0.007798 + ], + [ + 8.438275, + 0.050873, + 0.050586 + ], + [ + 15.948981, + 0.388244, + 0.38619 + ], + [ + 0.081452, + 0.081091, + 0.081063 + ], + [ + 0.113807, + 0.113659, + 0.113587 + ], + [ + 0.057215, + 0.057094, + 0.057103 + ] + ] +} \ No newline at end of file diff --git a/zighouse/run.sh b/zighouse/run.sh new file mode 100755 index 000000000..c59729593 --- /dev/null +++ b/zighouse/run.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -euo pipefail + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) +CLICKBENCH_DIR=$(cd -- "${SCRIPT_DIR}/.." && pwd) + +: "${ZIGHOUSE_REPO:?Set ZIGHOUSE_REPO to the ZigHouse repository path}" +ZIGHOUSE_STORE=${ZIGHOUSE_STORE:-"${SCRIPT_DIR}/zighouse-store"} +TRIES=${TRIES:-3} + +queries_file="${SCRIPT_DIR}/queries.sql" +if [ ! -f "${queries_file}" ]; then + queries_file="${CLICKBENCH_DIR}/duckdb/queries.sql" +fi + +ZIGHOUSE_CLICKBENCH_SUBMIT=1 "${ZIGHOUSE_REPO}/zig-out/bin/zighouse" --backend native bench "${ZIGHOUSE_STORE}" "${queries_file}" \ + | grep '^\['