diff --git a/bin/cuda/gpu_benchmark.cu b/bin/cuda/gpu_benchmark.cu index fe459e0c..740cdbc9 100644 --- a/bin/cuda/gpu_benchmark.cu +++ b/bin/cuda/gpu_benchmark.cu @@ -29,7 +29,7 @@ float getElapsedTime(const cudaEvent_t &gpu_start, cudaEvent_t &gpu_stop) { // Function to run the GPU benchmark with no time limit void runBenchmark(long max_work) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; unsigned long long int *d_count; curandState *d_state; @@ -88,7 +88,7 @@ void runBenchmark(long max_work) { void runBenchmarkTime(long max_work, int runtime_in_seconds) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; // allocate memory unsigned long long int *d_count; diff --git a/bin/hip/gpu_benchmark.hip b/bin/hip/gpu_benchmark.hip index 93c15ff8..d3612504 100644 --- a/bin/hip/gpu_benchmark.hip +++ b/bin/hip/gpu_benchmark.hip @@ -27,7 +27,7 @@ float getElapsedTime(const hipEvent_t &gpu_start, hipEvent_t &gpu_stop) { // Function to run the GPU benchmark with no time limit void runBenchmark(long max_work) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; unsigned long long int *d_count; hiprandState *d_state; @@ -86,7 +86,7 @@ void runBenchmark(long max_work) { void runBenchmarkTime(long max_work, int runtime_in_seconds) { uint32_t n = 256 * 256; - uint64_t m = max_work * 16384 / n; + uint64_t m = (max_work + n - 1) / n; // allocate memory unsigned long long int *d_count; diff --git a/bin/wfbench b/bin/wfbench index 90980e46..01e2865d 100755 --- a/bin/wfbench +++ b/bin/wfbench @@ -20,6 +20,7 @@ import json import logging import pandas as pd import psutil +import shutil from io import StringIO from filelock import FileLock @@ -230,11 +231,21 @@ class GPUBenchmark: @staticmethod def get_available_gpus(): - proc = subprocess.Popen(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv"], stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, _ = proc.communicate() - df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=" ") - return df[df["utilization.gpu"] <= 5].index.to_list() + if shutil.which("nvidia-smi") is not None: + proc = subprocess.Popen(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv"], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, _ = proc.communicate() + df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=" ") + return df[df["utilization.gpu"] <= 5].index.to_list() + elif shutil.which("amd-smi") is not None: + proc = subprocess.Popen(["amd-smi", "monitor", "-u", "--csv"], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, _ = proc.communicate() + df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=",") + return df[df["gfx"] <= 5].index.to_list() + else: + log_error("No supported GPU monitoring tool found.") + return [] def __init__(self): self.work = None @@ -261,13 +272,20 @@ class GPUBenchmark: if self.duration is not None: log_debug(f"Running GPU benchmark for {self.duration} seconds") - gpu_prog = [ - f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"] + if shutil.which("nvidia-smi") is not None: + gpu_prog = [ + f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"] + else: + gpu_prog = [ + f"HIP_DEVICE_ORDER=PCI_BUS_ID HIP_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"] else: log_debug(f"Running GPU benchmark for {self.work} units of work") - gpu_prog = [ - f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"] - + if shutil.which("nvidia-smi") is not None: + gpu_prog = [ + f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"] + else: + gpu_prog = [ + f"HIP_DEVICE_ORDER=PCI_BUS_ID HIP_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"] p = subprocess.Popen(gpu_prog, shell=True) return ProcessHandle(p)