From b59d12bc56fa11d931b6b3d0aa05be138e17d1c6 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 11 May 2026 11:24:05 -0400 Subject: [PATCH 1/3] Support both nvidia-smi and amd-smi Signed-off-by: Steven Hahn --- bin/wfbench | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/bin/wfbench b/bin/wfbench index 90980e46..2b97299c 100755 --- a/bin/wfbench +++ b/bin/wfbench @@ -20,6 +20,7 @@ import json import logging import pandas as pd import psutil +import shutil from io import StringIO from filelock import FileLock @@ -230,11 +231,21 @@ class GPUBenchmark: @staticmethod def get_available_gpus(): - proc = subprocess.Popen(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv"], stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, _ = proc.communicate() - df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=" ") - return df[df["utilization.gpu"] <= 5].index.to_list() + if shutil.which("nvidia-smi") is not None: + proc = subprocess.Popen(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv"], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, _ = proc.communicate() + df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=" ") + return df[df["utilization.gpu"] <= 5].index.to_list() + elif shutil.which("amd-smi") is not None: + proc = subprocess.Popen(["amd-smi", "monitor", "-u", "--csv"], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, _ = proc.communicate() + df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=",") + return df[df["gfx"] <= 5].index.to_list() + else: + log_error("No supported GPU monitoring tool found.") + return [] def __init__(self): self.work = None From f7a12af0adba300d3ba649a01ccfa2d74f5fa69d Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Mon, 11 May 2026 13:17:58 -0400 Subject: [PATCH 2/3] fix factor of 16384 bug Signed-off-by: Steven Hahn --- bin/cuda/gpu_benchmark.cu | 4 ++-- bin/hip/gpu_benchmark.hip | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/cuda/gpu_benchmark.cu b/bin/cuda/gpu_benchmark.cu index fe459e0c..740cdbc9 100644 --- a/bin/cuda/gpu_benchmark.cu +++ b/bin/cuda/gpu_benchmark.cu @@ -29,7 +29,7 @@ float getElapsedTime(const cudaEvent_t &gpu_start, cudaEvent_t &gpu_stop) { // Function to run the GPU benchmark with no time limit void runBenchmark(long max_work) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; unsigned long long int *d_count; curandState *d_state; @@ -88,7 +88,7 @@ void runBenchmark(long max_work) { void runBenchmarkTime(long max_work, int runtime_in_seconds) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; // allocate memory unsigned long long int *d_count; diff --git a/bin/hip/gpu_benchmark.hip b/bin/hip/gpu_benchmark.hip index 93c15ff8..d3612504 100644 --- a/bin/hip/gpu_benchmark.hip +++ b/bin/hip/gpu_benchmark.hip @@ -27,7 +27,7 @@ float getElapsedTime(const hipEvent_t &gpu_start, hipEvent_t &gpu_stop) { // Function to run the GPU benchmark with no time limit void runBenchmark(long max_work) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; unsigned long long int *d_count; hiprandState *d_state; @@ -86,7 +86,7 @@ void runBenchmark(long max_work) { void runBenchmarkTime(long max_work, int runtime_in_seconds) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; // allocate memory unsigned long long int *d_count; From 1c81edfea1d1f7c86f7e1fe40296e3378cff5521 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Tue, 12 May 2026 11:45:18 -0400 Subject: [PATCH 3/3] CUDA variables -> HIP variables Signed-off-by: Steven Hahn --- bin/wfbench | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/bin/wfbench b/bin/wfbench index 2b97299c..01e2865d 100755 --- a/bin/wfbench +++ b/bin/wfbench @@ -272,13 +272,20 @@ class GPUBenchmark: if self.duration is not None: log_debug(f"Running GPU benchmark for {self.duration} seconds") - gpu_prog = [ - f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"] + if shutil.which("nvidia-smi") is not None: + gpu_prog = [ + f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"] + else: + gpu_prog = [ + f"HIP_DEVICE_ORDER=PCI_BUS_ID HIP_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"] else: log_debug(f"Running GPU benchmark for {self.work} units of work") - gpu_prog = [ - f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"] - + if shutil.which("nvidia-smi") is not None: + gpu_prog = [ + f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"] + else: + gpu_prog = [ + f"HIP_DEVICE_ORDER=PCI_BUS_ID HIP_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"] p = subprocess.Popen(gpu_prog, shell=True) return ProcessHandle(p)