From bd6cf58c5b7a55276ec143a9f82b17ec86e91e78 Mon Sep 17 00:00:00 2001 From: Matthew Michel Date: Thu, 13 Nov 2025 07:26:21 -0800 Subject: [PATCH 1/3] Add L0 graph SubmitGraph case --- devops/scripts/benchmarks/benches/compute.py | 51 +++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index f4ed34fec8553..312840ec98a1c 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -208,25 +208,15 @@ def benchmarks(self) -> list[Benchmark]: measure_completion_time, use_events, ) in submit_graph_params: - # Non-sycl runtimes have to be run with emulated graphs, - # see: https://github.com/intel/compute-benchmarks/commit/d81d5d602739482b9070c872a28c0b5ebb41de70 - emulate_graphs = ( - 0 if runtime in (RUNTIMES.SYCL, RUNTIMES.SYCL_PREVIEW) else 1 - ) - benches.append( - GraphApiSubmitGraph( - self, - runtime, - in_order_queue, - num_kernels, - measure_completion_time, - use_events, - emulate_graphs, - useHostTasks=0, - ) - ) - if runtime == RUNTIMES.SYCL: - # Create CPU count variant + # SYCL only supports graph mode, UR supports only emulation with command buffers, + # and L0 supports both modes via graph and command list APIs. + if runtime == RUNTIMES.SYCL or runtime == RUNTIMES.SYCL_PREVIEW: + emulate_graphs = [0] + elif runtime == RUNTIMES.UR: + emulate_graphs = [1] + else: # level-zero + emulate_graphs = [0, 1] + for emulate_graph in emulate_graphs: benches.append( GraphApiSubmitGraph( self, @@ -235,11 +225,25 @@ def benchmarks(self) -> list[Benchmark]: num_kernels, measure_completion_time, use_events, - emulate_graphs, + emulate_graph, useHostTasks=0, - profiler_type=PROFILERS.CPU_COUNTER, ) ) + if runtime == RUNTIMES.SYCL: + # Create CPU count variant + benches.append( + GraphApiSubmitGraph( + self, + runtime, + in_order_queue, + num_kernels, + measure_completion_time, + use_events, + emulate_graph, + useHostTasks=0, + profiler_type=PROFILERS.CPU_COUNTER, + ) + ) # Add other benchmarks benches += [ @@ -1067,6 +1071,7 @@ def __init__( self._use_events = useEvents self._use_host_tasks = useHostTasks self._emulate_graphs = emulate_graphs + self._emulate_str = " with graph emulation" if self._emulate_graphs else "" self._ioq_str = "in order" if self._in_order_queue else "out of order" self._measure_str = ( " with measure completion" if self._measure_completion_time else "" @@ -1085,10 +1090,10 @@ def __init__( ) def name(self): - return f"graph_api_benchmark_{self._runtime.value} SubmitGraph{self._use_events_str}{self._host_tasks_str} numKernels:{self._num_kernels} ioq {self._in_order_queue} measureCompletion {self._measure_completion_time}{self._cpu_count_str()}" + return f"graph_api_benchmark_{self._runtime.value} SubmitGraph{self._use_events_str}{self._host_tasks_str}{self._emulate_str} numKernels:{self._num_kernels} ioq {self._in_order_queue} measureCompletion {self._measure_completion_time}{self._cpu_count_str()}" def display_name(self) -> str: - return f"{self._runtime.value.upper()} SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}" + return f"{self._runtime.value.upper()} SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}{self._emulate_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}" def explicit_group(self): return f"SubmitGraph {self._ioq_str}{self._measure_str}{self._use_events_str}{self._host_tasks_str}, {self._num_kernels} kernels{self._cpu_count_str(separator=',')}" From e9a0748d13c709398e61e22ea57bd649fa5dadf0 Mon Sep 17 00:00:00 2001 From: Matthew Michel Date: Wed, 19 Nov 2025 07:37:58 -0800 Subject: [PATCH 2/3] Fix formatting --- devops/scripts/benchmarks/benches/compute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 312840ec98a1c..6d13db17640a6 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -214,7 +214,7 @@ def benchmarks(self) -> list[Benchmark]: emulate_graphs = [0] elif runtime == RUNTIMES.UR: emulate_graphs = [1] - else: # level-zero + else: # level-zero emulate_graphs = [0, 1] for emulate_graph in emulate_graphs: benches.append( From 6f9ff275c558a918b20d2c2af7c093d077ccd20a Mon Sep 17 00:00:00 2001 From: Matthew Michel Date: Wed, 19 Nov 2025 20:07:10 -0800 Subject: [PATCH 3/3] Exclude L0 graph case on PVC --- devops/scripts/benchmarks/benches/compute.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py index 6d13db17640a6..49ab80e76fd6e 100644 --- a/devops/scripts/benchmarks/benches/compute.py +++ b/devops/scripts/benchmarks/benches/compute.py @@ -215,7 +215,9 @@ def benchmarks(self) -> list[Benchmark]: elif runtime == RUNTIMES.UR: emulate_graphs = [1] else: # level-zero - emulate_graphs = [0, 1] + # SubmitGraph with L0 graph segfaults on PVC + device_arch = getattr(options, "device_architecture", "") + emulate_graphs = [1] if "pvc" in device_arch else [0, 1] for emulate_graph in emulate_graphs: benches.append( GraphApiSubmitGraph(