Skip to content

Commit 168f459

Browse files
authored
[Benchmarks] add Record and Replay benchmarks (#20481)
with unittest support
1 parent e34323f commit 168f459

File tree

7 files changed

+341
-34
lines changed

7 files changed

+341
-34
lines changed

.github/workflows/sycl-ur-perf-benchmarking.yml

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -238,30 +238,14 @@ jobs:
238238
toolchain_decompress_command: ${{ needs.build_nightly.outputs.toolchain_decompress_command }}
239239
# END nightly benchmarking path
240240

241-
# Benchmark framework builds and runs on PRs path:
242-
build_pr:
243-
name: '[PR] Build SYCL'
244-
if: github.event_name == 'pull_request'
245-
uses: ./.github/workflows/sycl-linux-build.yml
246-
with:
247-
build_ref: ${{ github.sha }}
248-
build_cache_root: "/__w/"
249-
build_cache_suffix: "default"
250-
# Docker image has last nightly pre-installed and added to the PATH
251-
build_image: "ghcr.io/intel/llvm/sycl_ubuntu2404_nightly:latest"
252-
cc: clang
253-
cxx: clang++
254-
changes: '[]'
255-
toolchain_artifact: sycl_linux_default
256-
241+
# BEGIN benchmark framework builds and runs on PRs path
257242
# TODO: When we have stable BMG runner(s), consider moving this job to that runner.
258243
test_benchmark_framework:
259244
name: '[PR] Benchmark suite testing'
260-
needs: [build_pr]
261245
permissions:
262246
contents: write
263247
packages: read
264-
if: ${{ !cancelled() && needs.build_pr.outputs.build_conclusion == 'success' }}
248+
if: github.event_name == 'pull_request'
265249
uses: ./.github/workflows/sycl-linux-run-tests.yml
266250
with:
267251
name: 'Framework test: PVC_PERF, L0, Minimal preset'

devops/actions/run-tests/benchmark/action.yml

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -113,17 +113,19 @@ runs:
113113
# modified output the entire sycl build dir as an artifact, in which the
114114
# intermediate files required can be stitched together from the build files.
115115
# However, this is not exactly "clean" or "fun to maintain"...
116-
- name: Build Unified Runtime
116+
- name: Build LLVM
117117
shell: bash
118118
run: |
119-
# Build Unified Runtime
119+
echo "::group::checkout_llvm"
120120
# Sparse-checkout UR at build ref:
121121
git clone --depth 1 --no-checkout https://github.com/intel/llvm ur
122122
cd ur
123123
git sparse-checkout init
124124
git sparse-checkout set unified-runtime
125125
git fetch origin ${{ inputs.build_ref }}
126126
git checkout FETCH_HEAD
127+
echo "::endgroup::"
128+
echo "::group::configure_llvm"
127129
128130
# Configure UR
129131
mkdir build install
@@ -135,39 +137,50 @@ runs:
135137
-DUR_BUILD_ADAPTER_L0=ON \
136138
-DUR_BUILD_ADAPTER_L0_V2=ON
137139
138-
# Build and install UR
140+
echo "::endgroup::"
141+
echo "::group::build_and_install_llvm"
142+
139143
cmake --build build -j "$(nproc)"
140144
cmake --install build
141145
142146
cd -
147+
148+
echo "::endgroup::"
143149
# Install level zero v1.25.2
144150
# This is to have the latest level zero required by Compute Benchmarks
145151
# Remove this w/a once the sycl nightly images are updated to have level zero v1.25.2
146152
- name: Install level zero v1.25.2
147153
shell: bash
148154
run: |
149-
# Install level zero v1.25.2
155+
echo "::group::checkout_level_zero"
150156
# Checkout Level Zero at build ref:
151157
wget https://github.com/oneapi-src/level-zero/archive/refs/tags/v1.25.2.tar.gz -O level-zero-v1.25.2.tar.gz
152158
tar -xvf level-zero-v1.25.2.tar.gz
153159
cd level-zero-1.25.2
154160
155-
# Configure Level Zero
161+
echo "::endgroup::"
162+
echo "::group::configure_level_zero"
163+
156164
cmake -DCMAKE_BUILD_TYPE=Release \
157165
-Bbuild
158166
159-
# Build and install Level Zero
167+
echo "::endgroup::"
168+
echo "::group::build_and_install_level_zero"
169+
160170
cmake --build build -j "$(nproc)"
161171
sudo cmake --install build
162172
163173
cd -
174+
echo "::endgroup::"
164175
# Linux tools installed during docker creation may not match the self-hosted
165176
# kernel version, so we need to install the correct version here.
166177
- name: Install perf in version matching the host kernel
167178
shell: bash
168179
run: |
180+
echo "::group::install_linux_tools"
169181
sudo apt-get update
170182
sudo apt-get install -y linux-tools-$(uname -r)
183+
echo "::endgroup::"
171184
- name: Set env var for results branch
172185
shell: bash
173186
run: |
@@ -188,18 +201,19 @@ runs:
188201
SAVE_PREFIX: ${{ inputs.save_name }}
189202
shell: bash
190203
run: |
191-
# Build and run benchmarks
192204
# TODO generate summary + display helpful message here
193205
export CMPLR_ROOT=./toolchain
194-
echo "-----"
206+
echo "::group::install_python_deps"
207+
echo "Installing python dependencies..."
195208
# Using --break-system-packages because:
196209
# - venv is not installed
197210
# - unable to install anything via pip, as python packages in the docker
198211
# container are managed by apt
199212
# - apt is unable to install anything due to unresolved dpkg dependencies,
200213
# as a result of how the sycl nightly images are created
201214
pip install --user --break-system-packages -r ./devops/scripts/benchmarks/requirements.txt
202-
echo "-----"
215+
echo "::endgroup::"
216+
echo "::group::sycl_ls"
203217
204218
# By default, the benchmark scripts forceload level_zero
205219
FORCELOAD_ADAPTER="${ONEAPI_DEVICE_SELECTOR%%:*}"
@@ -235,7 +249,8 @@ runs:
235249
export COMPUTE_RUNTIME_TAG_CACHE="$(cat ./devops/dependencies.json | jq -r .linux.compute_runtime.github_tag)"
236250

237251
sycl-ls
238-
echo "-----"
252+
echo "::endgroup::"
253+
echo "::group::run_benchmarks"
239254

240255
WORKDIR="$(realpath ./llvm_test_workdir)"
241256
if [ -n "$WORKDIR" ] && [ -d "$WORKDIR" ] && [[ "$WORKDIR" == *llvm_test_workdir* ]]; then rm -rf "$WORKDIR" ; fi
@@ -254,7 +269,8 @@ runs:
254269
${{ inputs.exit_on_failure == 'true' && '--exit-on-failure --iterations 1' || '' }}
255270
# TODO: add back: "--flamegraph inclusive" once works properly
256271

257-
echo "-----"
272+
echo "::endgroup::"
273+
echo "::group::compare_results"
258274
python3 ./devops/scripts/benchmarks/compare.py to_hist \
259275
--avg-type EWMA \
260276
--cutoff "$(date -u -d '7 days ago' +'%Y%m%d_%H%M%S')" \
@@ -267,7 +283,9 @@ runs:
267283
--produce-github-summary \
268284
${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \
269285

270-
echo "-----"
286+
echo "::endgroup::"
287+
288+
LLVM_BENCHMARKS_UNIT_TESTING=1 COMPUTE_BENCHMARKS_BUILD_PATH=$WORKDIR/compute-benchmarks-build python3 ./devops/scripts/benchmarks/tests/test_integration.py
271289

272290
- name: Cache changes and upload github summary
273291
if: always()

devops/scripts/benchmarks/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,21 @@ $ cmake --build ~/ur_build -j $(nproc)
5252
$ cmake --install ~/ur_build
5353
```
5454

55+
## Testing
56+
57+
There is a test which can execute benchmarking code and do some checks
58+
of internal data structures. In order to use it one should
59+
- prepare environment on its own (Level Zero, OneAPI or somehow SYCL
60+
`clang++` compiler)
61+
- have CMPLR_ROOT set and pointing to directory with `clang++`
62+
- have COMPUTE_BENCHMARKS_BUILD_PATH variable pointing to build directory of compute-benchmarks
63+
- set LLVM_BENCHMARKS_UNIT_TESTING=1
64+
65+
Then tests can be executed by
66+
```
67+
python3 ./devops/scripts/benchmarks/tests/test_integration.py
68+
```
69+
5570
## Results
5671

5772
By default, the benchmark results are not stored.

devops/scripts/benchmarks/benches/compute.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,52 @@ def benchmarks(self) -> list[Benchmark]:
270270
)
271271
)
272272

273+
record_and_replay_params = product([0, 1], [0, 1])
274+
for emulate, instantiate in record_and_replay_params:
275+
276+
def createRrBench(variant_name: str, **kwargs):
277+
return RecordAndReplay(
278+
self,
279+
RUNTIMES.LEVEL_ZERO,
280+
variant_name,
281+
PROFILERS.TIMER,
282+
mRec=1,
283+
mInst=instantiate,
284+
mDest=0,
285+
emulate=emulate,
286+
**kwargs,
287+
)
288+
289+
benches += [
290+
createRrBench(
291+
"large",
292+
nForksInLvl=2,
293+
nLvls=4,
294+
nCmdSetsInLvl=10,
295+
nInstantiations=10,
296+
nAppendKern=10,
297+
nAppendCopy=1,
298+
),
299+
createRrBench(
300+
"medium",
301+
nForksInLvl=1,
302+
nLvls=1,
303+
nCmdSetsInLvl=10,
304+
nInstantiations=10,
305+
nAppendKern=10,
306+
nAppendCopy=10,
307+
),
308+
createRrBench(
309+
"short",
310+
nForksInLvl=1,
311+
nLvls=4,
312+
nCmdSetsInLvl=1,
313+
nInstantiations=0,
314+
nAppendKern=1,
315+
nAppendCopy=0,
316+
),
317+
]
318+
273319
# Add UR-specific benchmarks
274320
benches += [
275321
# TODO: multithread_benchmark_ur fails with segfault
@@ -648,6 +694,49 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
648694
]
649695

650696

697+
class RecordAndReplay(ComputeBenchmark):
698+
def __init__(
699+
self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs
700+
):
701+
self.variant_name = variant_name
702+
self.rr_params = kwargs
703+
self.iterations_regular = 1000
704+
self.iterations_trace = 10
705+
super().__init__(
706+
suite,
707+
f"record_and_replay_benchmark_{runtime.value}",
708+
"RecordGraph",
709+
runtime,
710+
profiler_type,
711+
)
712+
713+
def explicit_group(self):
714+
return f"{self.test} {self.variant_name}"
715+
716+
def display_name(self) -> str:
717+
return f"{self.explicit_group()}_{self.runtime.value}"
718+
719+
def name(self):
720+
ret = []
721+
for k, v in self.rr_params.items():
722+
if k[0] == "n": # numeric parameter
723+
ret.append(f"{k[1:]} {v}")
724+
elif k[0] == "m":
725+
if v != 0: # measure parameter
726+
ret.append(f"{k[1:]}")
727+
else: # boolean parameter
728+
if v != 0:
729+
ret.append(k)
730+
ret.sort()
731+
return self.bench_name + " " + ", ".join(ret)
732+
733+
def get_tags(self):
734+
return ["L0"]
735+
736+
def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
737+
return [f"--{k}={v}" for k, v in self.rr_params.items()]
738+
739+
651740
class QueueInOrderMemcpy(ComputeBenchmark):
652741
def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
653742
self.isCopyOnly = isCopyOnly

devops/scripts/benchmarks/git_project.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6+
import os
67
from pathlib import Path
78
import shutil
89

910
from utils.logger import log
1011
from utils.utils import run
1112
from options import options
1213

13-
1414
class GitProject:
1515
def __init__(
1616
self,
@@ -167,6 +167,11 @@ def _setup_repo(self) -> bool:
167167
Returns:
168168
bool: True if the repository was cloned or updated, False if it was already up-to-date.
169169
"""
170+
if os.environ.get("LLVM_BENCHMARKS_UNIT_TESTING") == "1":
171+
log.debug(
172+
f"Skipping git operations during unit testing of {self._name} (LLVM_BENCHMARKS_UNIT_TESTING=1)."
173+
)
174+
return False
170175
if not self.src_dir.exists():
171176
self._git_clone()
172177
return True

devops/scripts/benchmarks/main.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,13 @@ def process_results(
137137
stddev_threshold_override
138138
if stddev_threshold_override is not None
139139
else options.stddev_threshold
140-
) * mean_value
140+
)
141+
threshold_scaled = threshold * mean_value
141142

142-
if stddev > threshold:
143-
log.warning(f"stddev {stddev} above the threshold {threshold} for {label}")
143+
if stddev > threshold_scaled:
144+
log.warning(
145+
f"stddev {stddev} above the threshold {threshold_scaled} ({threshold} times {mean_value}) for {label}"
146+
)
144147
valid_results = False
145148

146149
rlist.sort(key=lambda res: res.value)
@@ -228,6 +231,10 @@ def main(directory, additional_env_vars, compare_names, filter):
228231
benchmark for benchmark in s.benchmarks() if benchmark.enabled()
229232
]
230233
if filter:
234+
# log.info(f"all benchmarks:\n" + "\n".join([b.name() for b in suite_benchmarks]))
235+
log.debug(
236+
f"Filtering {len(suite_benchmarks)} benchmarks in {s.name()} suite for {filter.pattern}"
237+
)
231238
suite_benchmarks = [
232239
benchmark
233240
for benchmark in suite_benchmarks
@@ -713,6 +720,7 @@ def validate_and_parse_env_args(env_args):
713720
options.dry_run = args.dry_run
714721
options.umf = args.umf
715722
options.iterations_stddev = args.iterations_stddev
723+
options.stddev_threshold = args.stddev_threshold
716724
options.build_igc = args.build_igc
717725
options.current_run_name = args.relative_perf
718726
options.cudnn_directory = args.cudnn_directory

0 commit comments

Comments
 (0)