-
Notifications
You must be signed in to change notification settings - Fork 0
115 lines (94 loc) · 3.99 KB
/
Copy pathbenchmark.yml
File metadata and controls
115 lines (94 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
name: Benchmark (Not run for now. Se demora demasiado en ejecutar mas de 2 horas)
on:
workflow_dispatch:
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
VECTORCODE_PROVIDER: onnx
jobs:
benchmark:
name: Benchmark (mini-corpus)
runs-on: macos-14 # ARM runner for ONNX performance
steps:
- uses: actions/checkout@v4
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Cache cargo registry and build
uses: Swatinem/rust-cache@v2
- name: Cache ONNX model
uses: actions/cache@v4
with:
path: ~/.vectorcode/models
key: onnx-model-all-MiniLM-L6-v2
restore-keys: |
onnx-model-
- name: Download ONNX model (if not cached)
run: |
if [ ! -d "$HOME/.vectorcode/models" ]; then
echo "ONNX model not cached, will download on first run"
fi
- name: Build release binary
run: cargo build --release
- name: Run mini-corpus benchmark
run: |
cargo run --release -- benchmark --corpus mini --output table
- name: Validate Indexing Footprint (vscode corpus)
run: |
cargo run --release -- bench-store --corpus vscode
- name: Verify against committed baselines
# Phase 4.1: the mock-mini regression gate. Runs scripts/verify-baseline.sh
# which executes three --compare flows (IR / structural / store) and
# exits non-zero on regression. The PR is blocked when this fails.
run: |
bash scripts/verify-baseline.sh
- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: |
benchmark-*.json
benchmarks/baseline/delta-report.json
BASELINE.md
retention-days: 30
- name: Comment PR with results
if: github.event_name == 'pull_request' && always()
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
let body = '## Benchmark Results (mini-corpus)\n\n';
try {
const results = fs.readFileSync('benchmark-mini.json', 'utf8');
const data = JSON.parse(results);
body += `**Corpus**: ${data.corpus}\n`;
body += `**Files indexed**: ${data.files_indexed}\n`;
body += `**Queries executed**: ${data.queries_executed}\n`;
body += `**Duration**: ${data.duration_secs.toFixed(2)}s\n\n`;
body += '### Aggregate Metrics\n\n';
body += '| Metric | Value |\n';
body += '|--------|-------|\n';
body += `| Recall@5 | ${data.aggregate.recall_at_5.toFixed(4)} |\n`;
body += `| Recall@10 | ${data.aggregate.recall_at_10.toFixed(4)} |\n`;
body += `| nDCG@10 | ${data.aggregate.ndcg_at_10.toFixed(4)} |\n`;
body += `| MRR | ${data.aggregate.mrr.toFixed(4)} |\n`;
body += '\n<details><summary>Per-query results</summary>\n\n';
body += '| Query | R@5 | R@10 | nDCG | MRR |\n';
body += '|-------|-----|------|------|-----|\n';
for (const qr of data.query_results.slice(0, 10)) {
const query = qr.query.substring(0, 40);
body += `| ${query} | ${qr.recall_at_5.toFixed(2)} | ${qr.recall_at_10.toFixed(2)} | ${qr.ndcg_at_10.toFixed(2)} | ${qr.mrr.toFixed(2)} |\n`;
}
if (data.query_results.length > 10) {
body += `| ... and ${data.query_results.length - 10} more | | | | |\n`;
}
body += '\n</details>\n';
} catch (err) {
body += '⚠️ Could not parse benchmark results\n';
}
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});