From beb32371650d4b44172d0c4325e03a4e4498e7c6 Mon Sep 17 00:00:00 2001 From: Ashwin Krishna Kumar Date: Thu, 25 Jun 2026 16:39:16 +0530 Subject: [PATCH 1/4] Allow bench workflow to run on PRs from forks --- .../workflows/checklist_comment_on_new_pr.yml | 3 ++- .github/workflows/run-bench.yml | 8 +++++--- .../jvector/example/AutoBenchYAML.java | 20 +++++++++++++------ 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.github/workflows/checklist_comment_on_new_pr.yml b/.github/workflows/checklist_comment_on_new_pr.yml index fda81de21..cb287ef05 100644 --- a/.github/workflows/checklist_comment_on_new_pr.yml +++ b/.github/workflows/checklist_comment_on_new_pr.yml @@ -1,6 +1,7 @@ name: Comment on new Pull Request with checklist on: - pull_request: + # safe as long as this workflow doesn't access code from the PR branch + pull_request_target: types: opened jobs: diff --git a/.github/workflows/run-bench.yml b/.github/workflows/run-bench.yml index 1b9533cad..8317f43c6 100644 --- a/.github/workflows/run-bench.yml +++ b/.github/workflows/run-bench.yml @@ -24,6 +24,7 @@ on: - '**/src/main/java/**' - 'pom.xml' - '**/pom.xml' + - '.github/workflows/run-bench.yml' jobs: # Job to generate the matrix configuration @@ -41,8 +42,8 @@ jobs: # Default branches based on event type if [[ "${{ github.event_name }}" == "pull_request" ]]; then - echo "Pull request detected. Using main and PR branch: ${{ github.head_ref }}" - BRANCHES='["main", "${{ github.head_ref }}"]' + echo "Pull request detected. Using main and PR ref: ${{ github.ref }}" + BRANCHES='["main", "${{ github.ref }}"]' elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then # Parse space-separated branches input into JSON array echo "Workflow dispatch with branches input detected" @@ -213,7 +214,8 @@ jobs: java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \ ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \ -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \ - -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-gemma-1m + -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML \ + --match-all-datasets --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} openai-1536-1m else java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \ ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \ diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java index 3f5be3d7d..903121fda 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java @@ -76,17 +76,20 @@ public static void main(String[] args) throws IOException { String finalOutputPath = outputPath; String configPath = null; int diagnostic_level = 0; - for (int i = 0; i < args.length - 1; i++) { - if (args[i].equals("--config")) configPath = args[i+1]; - if (args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]); + boolean matchAllDatasets = false; + for (int i = 0; i < args.length; i++) { + if (i < args.length - 1 && args[i].equals("--config")) configPath = args[i+1]; + if (i < args.length - 1 && args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]); + if (args[i].equals("--match-all-datasets")) matchAllDatasets = true; } if (diagnostic_level > 0) { Grid.setDiagnosticLevel(diagnostic_level); } String finalConfigPath = configPath; String[] filteredArgs = Arrays.stream(args) - .filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) && - !arg.equals("--config") && !arg.equals(finalConfigPath)) + .filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) && + !arg.equals("--config") && !arg.equals(finalConfigPath) && + !arg.equals("--match-all-datasets")) .toArray(String[]::new); // Log the filtered arguments for debugging @@ -100,7 +103,12 @@ public static void main(String[] args) throws IOException { var pattern = Pattern.compile(regex); var datasetCollection = DatasetCollection.load(); - var datasetNames = datasetCollection.getSection(REGRESSION_TEST_KEY).stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList()); + var candidateDatasets = matchAllDatasets ? datasetCollection.getAll() : datasetCollection.getSection(REGRESSION_TEST_KEY); + var datasetNames = candidateDatasets.stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList()); + + if (datasetNames.size() == 0) { + throw new RuntimeException("No datasets matched the given patterns, nothing to do"); + } logger.info("Executing the following datasets: {}", datasetNames); List results = new ArrayList<>(); From 190ca98b1f65666f4a15139bf70a0ea3d5c0b571 Mon Sep 17 00:00:00 2001 From: Ashwin Krishna Kumar Date: Thu, 25 Jun 2026 18:41:31 +0530 Subject: [PATCH 2/4] Fix compaction workflow as well --- .github/workflows/run-compaction.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-compaction.yml b/.github/workflows/run-compaction.yml index 49f72b64b..923149fc3 100644 --- a/.github/workflows/run-compaction.yml +++ b/.github/workflows/run-compaction.yml @@ -31,7 +31,7 @@ jobs: id: set-matrix run: | if [[ "${{ github.event_name }}" == "pull_request" ]]; then - BRANCHES='["main", "${{ github.head_ref }}"]' + BRANCHES='["main", "${{ github.ref }}"]' elif [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.branches }}" ]]; then BRANCHES_INPUT="${{ github.event.inputs.branches }}" BRANCHES="[" From 7b892f9c3010a2385ee162aa17f19db02db82bc8 Mon Sep 17 00:00:00 2001 From: Ashwin Krishna Kumar Date: Fri, 26 Jun 2026 10:41:32 +0530 Subject: [PATCH 3/4] Restore regression dataset to dpr-gemma-1m --- .github/workflows/run-bench.yml | 3 +-- .../jvector/example/AutoBenchYAML.java | 20 ++++++------------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/.github/workflows/run-bench.yml b/.github/workflows/run-bench.yml index 8317f43c6..68eff8ab0 100644 --- a/.github/workflows/run-bench.yml +++ b/.github/workflows/run-bench.yml @@ -214,8 +214,7 @@ jobs: java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \ ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \ -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/heap_dump/ -Xmx${HALF_MEM_GB}g \ - -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML \ - --match-all-datasets --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} openai-1536-1m + -cp jvector-examples/target/jvector-examples-*-jar-with-dependencies.jar io.github.jbellis.jvector.example.AutoBenchYAML --output ${SAFE_BRANCH}-bench-results ${CONFIG_ARG} dpr-gemma-1m else java ${{ matrix.jdk >= 20 && '--enable-native-access=ALL-UNNAMED --add-modules=jdk.incubator.vector' || '' }} \ ${{ matrix.jdk >= 22 && '-Djvector.experimental.enable_native_vectorization=true' || '' }} \ diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java index 903121fda..3f5be3d7d 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java @@ -76,20 +76,17 @@ public static void main(String[] args) throws IOException { String finalOutputPath = outputPath; String configPath = null; int diagnostic_level = 0; - boolean matchAllDatasets = false; - for (int i = 0; i < args.length; i++) { - if (i < args.length - 1 && args[i].equals("--config")) configPath = args[i+1]; - if (i < args.length - 1 && args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]); - if (args[i].equals("--match-all-datasets")) matchAllDatasets = true; + for (int i = 0; i < args.length - 1; i++) { + if (args[i].equals("--config")) configPath = args[i+1]; + if (args[i].equals("--diag")) diagnostic_level = Integer.parseInt(args[i+1]); } if (diagnostic_level > 0) { Grid.setDiagnosticLevel(diagnostic_level); } String finalConfigPath = configPath; String[] filteredArgs = Arrays.stream(args) - .filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) && - !arg.equals("--config") && !arg.equals(finalConfigPath) && - !arg.equals("--match-all-datasets")) + .filter(arg -> !arg.equals("--output") && !arg.equals(finalOutputPath) && + !arg.equals("--config") && !arg.equals(finalConfigPath)) .toArray(String[]::new); // Log the filtered arguments for debugging @@ -103,12 +100,7 @@ public static void main(String[] args) throws IOException { var pattern = Pattern.compile(regex); var datasetCollection = DatasetCollection.load(); - var candidateDatasets = matchAllDatasets ? datasetCollection.getAll() : datasetCollection.getSection(REGRESSION_TEST_KEY); - var datasetNames = candidateDatasets.stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList()); - - if (datasetNames.size() == 0) { - throw new RuntimeException("No datasets matched the given patterns, nothing to do"); - } + var datasetNames = datasetCollection.getSection(REGRESSION_TEST_KEY).stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList()); logger.info("Executing the following datasets: {}", datasetNames); List results = new ArrayList<>(); From d48acad76062ec50721176606ba8b3e3540ad457 Mon Sep 17 00:00:00 2001 From: Ashwin Krishna Kumar Date: Fri, 26 Jun 2026 10:50:10 +0530 Subject: [PATCH 4/4] Workflow defn. self-dep, error on missing datasets --- .github/workflows/run-compaction.yml | 1 + .../java/io/github/jbellis/jvector/example/AutoBenchYAML.java | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/run-compaction.yml b/.github/workflows/run-compaction.yml index 923149fc3..4fdc9ad58 100644 --- a/.github/workflows/run-compaction.yml +++ b/.github/workflows/run-compaction.yml @@ -19,6 +19,7 @@ on: - '**/src/main/java/**' - 'pom.xml' - '**/pom.xml' + - '.github/workflows/run-compaction.yml' jobs: # Job to generate the matrix configuration diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java index 3f5be3d7d..24c39ae47 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java @@ -102,6 +102,10 @@ public static void main(String[] args) throws IOException { var datasetCollection = DatasetCollection.load(); var datasetNames = datasetCollection.getSection(REGRESSION_TEST_KEY).stream().filter(dn -> pattern.matcher(dn).find()).collect(Collectors.toList()); + if (datasetNames.size() == 0) { + throw new RuntimeException("No datasets matched the given patterns, nothing to do"); + } + logger.info("Executing the following datasets: {}", datasetNames); List results = new ArrayList<>(); List compactionResults = new ArrayList<>();