11/* *
2- * Module 8: Domain-Specific Applications - Deep Learning Inference Kernels (HIP)
2+ * Module 8: Domain-Specific Applicatio
3+ #ifdef HAS_ROC_LIBRARIES
4+ #define ROCBLAS_CHECK(call) \
5+ do { \
6+ rocblas_status status = call; \
7+ if (status != rocblas_status_success) { \
8+ std::cerr << "rocBLAS error at " << __FILE__ << ":" << __LINE__ << std::endl; \
9+ exit(1); \
10+ } \
11+ } while(0)
12+ #endif
13+
14+ const int WAVEFRONT_SIZE = 64;earning Inference Kernels (HIP)
315 *
416 * Production-quality neural network inference implementations optimized for AMD GPU architectures.
517 * This example demonstrates deep learning kernels adapted for ROCm/HIP with wavefront-aware
1628#include < hip/hip_runtime.h>
1729#include " rocm7_utils.h" // ROCm 7.0 enhanced utilities
1830#include < hip/hip_fp16.h>
31+
32+ // Conditional ROC library support - disabled by default since they may not be available
33+ // #define HAS_ROC_LIBRARIES
34+ #ifdef HAS_ROC_LIBRARIES
1935#include < rocblas.h>
2036#include < rocrand.h>
37+ #endif
38+
2139#include < iostream>
2240#include < vector>
2341#include < chrono>
@@ -446,6 +464,7 @@ void benchmark_convolution_kernels() {
446464 HIP_CHECK (hipFree (d_output));
447465}
448466
467+ #ifdef HAS_ROC_LIBRARIES
449468void benchmark_rocblas_gemm () {
450469 std::cout << " \n === rocBLAS GEMM Benchmarks ===\n " ;
451470
@@ -511,6 +530,7 @@ void benchmark_rocblas_gemm() {
511530 rocblas_destroy_handle (handle);
512531 HIP_CHECK (hipFree (d_A)); HIP_CHECK (hipFree (d_B)); HIP_CHECK (hipFree (d_C));
513532}
533+ #endif
514534
515535void benchmark_activation_functions () {
516536 std::cout << " \n === AMD-Optimized Activation Function Benchmarks ===\n " ;
@@ -562,9 +582,7 @@ void benchmark_activation_functions() {
562582 HIP_CHECK (hipFree (d_data));
563583}
564584
565- int main () {
566- std::cout << " HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\n " ;
567- std::cout << " ======================================================================\n " ;
585+ int main () {\n#ifdef HAS_ROC_LIBRARIES\n std::cout << \" HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\\ n\" ;\n std::cout << \" ======================================================================\\ n\" ;
568586
569587 // Check HIP device properties
570588 int device;
@@ -581,7 +599,12 @@ int main() {
581599
582600 try {
583601 benchmark_convolution_kernels();
602+ #ifdef HAS_ROC_LIBRARIES
584603 benchmark_rocblas_gemm();
604+ #else
605+ std::cout << " \n=== rocBLAS GEMM Benchmarks ===\n" ;
606+ std::cout << " rocBLAS library not available. Install rocblas-dev package.\n" ;
607+ #endif
585608 benchmark_activation_functions();
586609
587610 std::cout << " \n=== AMD Deep Learning Optimization Summary ===\n" ;
@@ -598,10 +621,4 @@ int main() {
598621 std::cout << " Target: Production inference - Optimized for AMD GPU architecture\n" ;
599622 std::cout << " Target: Sub-millisecond latency - Achieved for small to medium models\n" ;
600623
601- } catch (const std::exception& e) {
602- std::cerr << " Error: " << e.what () << std::endl;
603- return -1 ;
604- }
605-
606- return 0 ;
607- }
624+ } catch (const std::exception& e) {\n std::cerr << \" Error: \" << e.what() << std::endl;\n return -1;\n }\n \n return 0;\n #else\n std::cout << \" Note: This example requires ROC libraries (rocBLAS, rocRAND) which are not available.\" << std::endl;\n std::cout << \" To enable this example:\" << std::endl;\n std::cout << \" 1. Install ROC libraries: sudo apt install rocblas-dev rocrand-dev\" << std::endl;\n std::cout << \" 2. Compile with -DHAS_ROC_LIBRARIES flag\" << std::endl;\n std::cout << \" 3. Link with -lrocblas -lrocrand\" << std::endl;\n std::cout << std::endl;\n std::cout << \" Skipping deep learning operations...\" << std::endl;\n return 0;\n #endif\n }
0 commit comments