Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bin/cuda/gpu_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ float getElapsedTime(const cudaEvent_t &gpu_start, cudaEvent_t &gpu_stop) {
// Function to run the GPU benchmark with no time limit
void runBenchmark(long max_work) {
uint32_t n = 256 * 256;
uint64_t m = max_work * 16384 / n;
uint64_t m = (max_work + n - 1) / n;

unsigned long long int *d_count;
curandState *d_state;
Expand Down Expand Up @@ -88,7 +88,7 @@ void runBenchmark(long max_work) {
void runBenchmarkTime(long max_work, int runtime_in_seconds) {

uint32_t n = 256 * 256;
uint64_t m = max_work * 16384 / n;
uint64_t m = (max_work + n - 1) / n;

// allocate memory
unsigned long long int *d_count;
Expand Down
4 changes: 2 additions & 2 deletions bin/hip/gpu_benchmark.hip
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ float getElapsedTime(const hipEvent_t &gpu_start, hipEvent_t &gpu_stop) {
// Function to run the GPU benchmark with no time limit
void runBenchmark(long max_work) {
uint32_t n = 256 * 256;
uint64_t m = max_work * 16384 / n;
uint64_t m = (max_work + n - 1) / n;

unsigned long long int *d_count;
hiprandState *d_state;
Expand Down Expand Up @@ -86,7 +86,7 @@ void runBenchmark(long max_work) {
void runBenchmarkTime(long max_work, int runtime_in_seconds) {

uint32_t n = 256 * 256;
uint64_t m = max_work * 16384 / n;
uint64_t m = (max_work + n - 1) / n;

// allocate memory
unsigned long long int *d_count;
Expand Down
38 changes: 28 additions & 10 deletions bin/wfbench
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import json
import logging
import pandas as pd
import psutil
import shutil

from io import StringIO
from filelock import FileLock
Expand Down Expand Up @@ -230,11 +231,21 @@ class GPUBenchmark:

@staticmethod
def get_available_gpus():
proc = subprocess.Popen(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv"], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, _ = proc.communicate()
df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=" ")
return df[df["utilization.gpu"] <= 5].index.to_list()
if shutil.which("nvidia-smi") is not None:
proc = subprocess.Popen(["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv"], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, _ = proc.communicate()
df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=" ")
return df[df["utilization.gpu"] <= 5].index.to_list()
elif shutil.which("amd-smi") is not None:
proc = subprocess.Popen(["amd-smi", "monitor", "-u", "--csv"], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, _ = proc.communicate()
df = pd.read_csv(StringIO(stdout.decode("utf-8")), sep=",")
return df[df["gfx"] <= 5].index.to_list()
else:
log_error("No supported GPU monitoring tool found.")
return []

def __init__(self):
self.work = None
Expand All @@ -261,13 +272,20 @@ class GPUBenchmark:

if self.duration is not None:
log_debug(f"Running GPU benchmark for {self.duration} seconds")
gpu_prog = [
f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"]
if shutil.which("nvidia-smi") is not None:
gpu_prog = [
f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"]
else:
gpu_prog = [
f"HIP_DEVICE_ORDER=PCI_BUS_ID HIP_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work} {self.duration}"]
else:
log_debug(f"Running GPU benchmark for {self.work} units of work")
gpu_prog = [
f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"]

if shutil.which("nvidia-smi") is not None:
gpu_prog = [
f"CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"]
else:
gpu_prog = [
f"HIP_DEVICE_ORDER=PCI_BUS_ID HIP_VISIBLE_DEVICES={self.device} {this_dir.joinpath('./gpu_benchmark')} {self.work}"]
p = subprocess.Popen(gpu_prog, shell=True)
return ProcessHandle(p)

Expand Down
Loading