Skip to content

Commit a06a5a3

Browse files
committed
Fixed the module9
1 parent 7c62a78 commit a06a5a3

File tree

4 files changed

+83
-14
lines changed

4 files changed

+83
-14
lines changed

modules/module8/examples/01_deep_learning_hip.cpp

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,7 @@ const int WAVEFRONT_SIZE = 64;earning Inference Kernels (HIP)
4545
#include <iomanip>
4646
#include <memory>
4747

48-
#define HIP_CHECK(call) \
49-
do { \
50-
hipError_t error = call; \
51-
if (error != hipSuccess) { \
52-
std::cerr << "HIP error at " << __FILE__ << ":" << __LINE__ << " - " << hipGetErrorString(error) << std::endl; \
53-
exit(1); \
54-
} \
55-
} while(0)
48+
// HIP_CHECK is now provided by rocm7_utils.h
5649

5750
#define ROCBLAS_CHECK(call) \
5851
do { \
@@ -324,11 +317,19 @@ class ConvolutionLayerAMD {
324317
HIP_CHECK(hipMalloc(&d_bias, bias_size));
325318

326319
// Initialize with random weights
320+
#ifdef HAS_ROC_LIBRARIES
327321
rocrand_generator gen;
328322
rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_XORWOW);
329323
rocrand_generate_normal(gen, d_weights, weights_size / sizeof(float), 0.0f, 0.1f);
330324
rocrand_generate_normal(gen, d_bias, bias_size / sizeof(float), 0.0f, 0.1f);
331325
rocrand_destroy_generator(gen);
326+
#else
327+
// Initialize with simple pattern since rocrand is not available
328+
std::vector<float> h_weights(weights_size / sizeof(float), 0.1f);
329+
std::vector<float> h_bias(bias_size / sizeof(float), 0.0f);
330+
HIP_CHECK(hipMemcpy(d_weights, h_weights.data(), weights_size, hipMemcpyHostToDevice));
331+
HIP_CHECK(hipMemcpy(d_bias, h_bias.data(), bias_size, hipMemcpyHostToDevice));
332+
#endif
332333
}
333334

334335
~ConvolutionLayerAMD() {
@@ -357,6 +358,7 @@ class ConvolutionLayerAMD {
357358
}
358359
};
359360

361+
#ifdef HAS_ROC_LIBRARIES
360362
class FullyConnectedLayerAMD {
361363
private:
362364
rocblas_handle rocblas_handle;
@@ -400,6 +402,7 @@ class FullyConnectedLayerAMD {
400402
output, batch_size));
401403
}
402404
};
405+
#endif
403406

404407
// Benchmark suite
405408
void benchmark_convolution_kernels() {
@@ -421,11 +424,19 @@ void benchmark_convolution_kernels() {
421424
HIP_CHECK(hipMalloc(&d_output, output_size));
422425

423426
// Initialize with random data
427+
#ifdef HAS_ROC_LIBRARIES
424428
rocrand_generator gen;
425429
rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_XORWOW);
426430
rocrand_generate_normal(gen, d_input, input_size / sizeof(float), 0.0f, 1.0f);
427431
rocrand_generate_normal(gen, d_weights, weights_size / sizeof(float), 0.0f, 0.1f);
428432
rocrand_destroy_generator(gen);
433+
#else
434+
// Initialize with simple pattern since rocrand is not available
435+
std::vector<float> h_input(input_size / sizeof(float), 1.0f);
436+
std::vector<float> h_weights(weights_size / sizeof(float), 0.1f);
437+
HIP_CHECK(hipMemcpy(d_input, h_input.data(), input_size, hipMemcpyHostToDevice));
438+
HIP_CHECK(hipMemcpy(d_weights, h_weights.data(), weights_size, hipMemcpyHostToDevice));
439+
#endif
429440

430441
PerformanceTimer timer;
431442

@@ -476,11 +487,19 @@ void benchmark_rocblas_gemm() {
476487
HIP_CHECK(hipMalloc(&d_C, M * N * sizeof(float)));
477488

478489
// Initialize data
490+
#ifdef HAS_ROC_LIBRARIES
479491
rocrand_generator gen;
480492
rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_XORWOW);
481493
rocrand_generate_normal(gen, d_A, M * K, 0.0f, 1.0f);
482494
rocrand_generate_normal(gen, d_B, K * N, 0.0f, 1.0f);
483495
rocrand_destroy_generator(gen);
496+
#else
497+
// Initialize with simple pattern since rocrand is not available
498+
std::vector<float> h_A(M * K, 1.0f);
499+
std::vector<float> h_B(K * N, 1.0f);
500+
HIP_CHECK(hipMemcpy(d_A, h_A.data(), M * K * sizeof(float), hipMemcpyHostToDevice));
501+
HIP_CHECK(hipMemcpy(d_B, h_B.data(), K * N * sizeof(float), hipMemcpyHostToDevice));
502+
#endif
484503

485504
PerformanceTimer timer;
486505
const int iterations = 10;
@@ -541,10 +560,16 @@ void benchmark_activation_functions() {
541560
HIP_CHECK(hipMalloc(&d_data, n * sizeof(float)));
542561

543562
// Initialize with random data
563+
#ifdef HAS_ROC_LIBRARIES
544564
rocrand_generator gen;
545565
rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_XORWOW);
546566
rocrand_generate_normal(gen, d_data, n, 0.0f, 1.0f);
547567
rocrand_destroy_generator(gen);
568+
#else
569+
// Initialize with simple pattern since rocrand is not available
570+
std::vector<float> h_data(n, 1.0f);
571+
HIP_CHECK(hipMemcpy(d_data, h_data.data(), n * sizeof(float), hipMemcpyHostToDevice));
572+
#endif
548573

549574
PerformanceTimer timer;
550575
const int iterations = 100;
@@ -582,7 +607,10 @@ void benchmark_activation_functions() {
582607
HIP_CHECK(hipFree(d_data));
583608
}
584609

585-
int main() {\n#ifdef HAS_ROC_LIBRARIES\n std::cout << \"HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\\n\";\n std::cout << \"======================================================================\\n\";
610+
int main() {
611+
#ifdef HAS_ROC_LIBRARIES
612+
std::cout << "HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\n";
613+
std::cout << "======================================================================\n";
586614

587615
// Check HIP device properties
588616
int device;
@@ -621,4 +649,20 @@ int main() {\n#ifdef HAS_ROC_LIBRARIES\n std::cout << \"HIP Deep Learning Inf
621649
std::cout << "Target: Production inference - Optimized for AMD GPU architecture\n";
622650
std::cout << "Target: Sub-millisecond latency - Achieved for small to medium models\n";
623651

624-
} catch (const std::exception& e) {\n std::cerr << \"Error: \" << e.what() << std::endl;\n return -1;\n }\n \n return 0;\n#else\n std::cout << \"Note: This example requires ROC libraries (rocBLAS, rocRAND) which are not available.\" << std::endl;\n std::cout << \"To enable this example:\" << std::endl;\n std::cout << \"1. Install ROC libraries: sudo apt install rocblas-dev rocrand-dev\" << std::endl;\n std::cout << \"2. Compile with -DHAS_ROC_LIBRARIES flag\" << std::endl;\n std::cout << \"3. Link with -lrocblas -lrocrand\" << std::endl;\n std::cout << std::endl;\n std::cout << \"Skipping deep learning operations...\" << std::endl;\n return 0;\n#endif\n}
652+
} catch (const std::exception& e) {
653+
std::cerr << "Error: " << e.what() << std::endl;
654+
return -1;
655+
}
656+
657+
return 0;
658+
#else
659+
std::cout << "Note: This example requires ROC libraries (rocBLAS, rocRAND) which are not available." << std::endl;
660+
std::cout << "To enable this example:" << std::endl;
661+
std::cout << "1. Install ROC libraries: sudo apt install rocblas-dev rocrand-dev" << std::endl;
662+
std::cout << "2. Compile with -DHAS_ROC_LIBRARIES flag" << std::endl;
663+
std::cout << "3. Link with -lrocblas -lrocrand" << std::endl;
664+
std::cout << std::endl;
665+
std::cout << "Skipping deep learning operations..." << std::endl;
666+
return 0;
667+
#endif
668+
}

modules/module8/examples/Makefile

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,32 @@ HIP_DEBUG_FLAGS = -std=c++17 -g
3232

3333
# Library flags for domain-specific libraries
3434
CUDA_LIBS = -lcublas -lcurand -lcufft
35-
HIP_LIBS = -lrocblas -lrocrand -lrocfft -lMIOpen
35+
36+
# Check for optional ROC libraries and set flags accordingly
37+
HAS_ROCBLAS := $(shell pkg-config --exists rocblas && echo 1 || echo 0)
38+
HAS_ROCRAND := $(shell pkg-config --exists rocrand && echo 1 || echo 0)
39+
HAS_ROCFFT := $(shell pkg-config --exists rocfft && echo 1 || echo 0)
40+
HAS_MIOPEN := $(shell pkg-config --exists MIOpen && echo 1 || echo 0)
41+
42+
# Build HIP_LIBS conditionally
43+
HIP_LIBS =
44+
ifeq ($(HAS_ROCBLAS),1)
45+
HIP_LIBS += -lrocblas
46+
endif
47+
ifeq ($(HAS_ROCRAND),1)
48+
HIP_LIBS += -lrocrand
49+
endif
50+
ifeq ($(HAS_ROCFFT),1)
51+
HIP_LIBS += -lrocfft
52+
endif
53+
ifeq ($(HAS_MIOPEN),1)
54+
HIP_LIBS += -lMIOpen
55+
endif
56+
57+
# Set compilation flag if any ROC libraries are available
58+
ifneq ($(HIP_LIBS),)
59+
HIP_FLAGS += -DHAS_ROC_LIBRARIES
60+
endif
3661

3762
# Directories
3863
BUILD_DIR = build

modules/module9/examples/01_architecture_hip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ class GPUResourceManager {
311311
throw std::runtime_error("Access denied: memory not owned by tenant");
312312
}
313313

314-
HIP_CHECK_PROD(HIP_CHECK(hipFree(device_ptr), "Memory deallocation for " + tenant_id);
314+
HIP_CHECK(hipFree(device_ptr));
315315

316316
total_allocated -= it->size;
317317
allocated_resources.erase(it);

modules/module9/examples/02_error_handling_hip.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ class SafeMemoryManager {
233233
auto it = allocated_ptrs_.find(ptr);
234234
if (it != allocated_ptrs_.end()) {
235235
try {
236-
CHECK_HIP(HIP_CHECK(hipFree(ptr));
236+
HIP_CHECK(hipFree(ptr));
237237
total_allocated_ -= it->second;
238238
allocated_ptrs_.erase(it);
239239

@@ -254,7 +254,7 @@ class SafeMemoryManager {
254254

255255
for (auto& pair : allocated_ptrs_) {
256256
try {
257-
CHECK_HIP(HIP_CHECK(hipFree(pair.first));
257+
HIP_CHECK(hipFree(pair.first));
258258
logger_.logInfo("Cleaned up " + std::to_string(pair.second) + " bytes");
259259
} catch (const GPUException& e) {
260260
logger_.logError(e);

0 commit comments

Comments
 (0)