@@ -45,14 +45,7 @@ const int WAVEFRONT_SIZE = 64;earning Inference Kernels (HIP)
4545#include < iomanip>
4646#include < memory>
4747
48- #define HIP_CHECK (call ) \
49- do { \
50- hipError_t error = call; \
51- if (error != hipSuccess) { \
52- std::cerr << " HIP error at " << __FILE__ << " :" << __LINE__ << " - " << hipGetErrorString (error) << std::endl; \
53- exit (1 ); \
54- } \
55- } while (0 )
48+ // HIP_CHECK is now provided by rocm7_utils.h
5649
5750#define ROCBLAS_CHECK (call ) \
5851 do { \
@@ -324,11 +317,19 @@ class ConvolutionLayerAMD {
324317 HIP_CHECK (hipMalloc (&d_bias, bias_size));
325318
326319 // Initialize with random weights
320+ #ifdef HAS_ROC_LIBRARIES
327321 rocrand_generator gen;
328322 rocrand_create_generator (&gen, ROCRAND_RNG_PSEUDO_XORWOW);
329323 rocrand_generate_normal (gen, d_weights, weights_size / sizeof (float ), 0 .0f , 0 .1f );
330324 rocrand_generate_normal (gen, d_bias, bias_size / sizeof (float ), 0 .0f , 0 .1f );
331325 rocrand_destroy_generator (gen);
326+ #else
327+ // Initialize with simple pattern since rocrand is not available
328+ std::vector<float > h_weights (weights_size / sizeof (float ), 0 .1f );
329+ std::vector<float > h_bias (bias_size / sizeof (float ), 0 .0f );
330+ HIP_CHECK (hipMemcpy (d_weights, h_weights.data (), weights_size, hipMemcpyHostToDevice));
331+ HIP_CHECK (hipMemcpy (d_bias, h_bias.data (), bias_size, hipMemcpyHostToDevice));
332+ #endif
332333 }
333334
334335 ~ConvolutionLayerAMD () {
@@ -357,6 +358,7 @@ class ConvolutionLayerAMD {
357358 }
358359};
359360
361+ #ifdef HAS_ROC_LIBRARIES
360362class FullyConnectedLayerAMD {
361363private:
362364 rocblas_handle rocblas_handle;
@@ -400,6 +402,7 @@ class FullyConnectedLayerAMD {
400402 output, batch_size));
401403 }
402404};
405+ #endif
403406
404407// Benchmark suite
405408void benchmark_convolution_kernels () {
@@ -421,11 +424,19 @@ void benchmark_convolution_kernels() {
421424 HIP_CHECK (hipMalloc (&d_output, output_size));
422425
423426 // Initialize with random data
427+ #ifdef HAS_ROC_LIBRARIES
424428 rocrand_generator gen;
425429 rocrand_create_generator (&gen, ROCRAND_RNG_PSEUDO_XORWOW);
426430 rocrand_generate_normal (gen, d_input, input_size / sizeof (float ), 0 .0f , 1 .0f );
427431 rocrand_generate_normal (gen, d_weights, weights_size / sizeof (float ), 0 .0f , 0 .1f );
428432 rocrand_destroy_generator (gen);
433+ #else
434+ // Initialize with simple pattern since rocrand is not available
435+ std::vector<float > h_input (input_size / sizeof (float ), 1 .0f );
436+ std::vector<float > h_weights (weights_size / sizeof (float ), 0 .1f );
437+ HIP_CHECK (hipMemcpy (d_input, h_input.data (), input_size, hipMemcpyHostToDevice));
438+ HIP_CHECK (hipMemcpy (d_weights, h_weights.data (), weights_size, hipMemcpyHostToDevice));
439+ #endif
429440
430441 PerformanceTimer timer;
431442
@@ -476,11 +487,19 @@ void benchmark_rocblas_gemm() {
476487 HIP_CHECK (hipMalloc (&d_C, M * N * sizeof (float )));
477488
478489 // Initialize data
490+ #ifdef HAS_ROC_LIBRARIES
479491 rocrand_generator gen;
480492 rocrand_create_generator (&gen, ROCRAND_RNG_PSEUDO_XORWOW);
481493 rocrand_generate_normal (gen, d_A, M * K, 0 .0f , 1 .0f );
482494 rocrand_generate_normal (gen, d_B, K * N, 0 .0f , 1 .0f );
483495 rocrand_destroy_generator (gen);
496+ #else
497+ // Initialize with simple pattern since rocrand is not available
498+ std::vector<float > h_A (M * K, 1 .0f );
499+ std::vector<float > h_B (K * N, 1 .0f );
500+ HIP_CHECK (hipMemcpy (d_A, h_A.data (), M * K * sizeof (float ), hipMemcpyHostToDevice));
501+ HIP_CHECK (hipMemcpy (d_B, h_B.data (), K * N * sizeof (float ), hipMemcpyHostToDevice));
502+ #endif
484503
485504 PerformanceTimer timer;
486505 const int iterations = 10 ;
@@ -541,10 +560,16 @@ void benchmark_activation_functions() {
541560 HIP_CHECK (hipMalloc (&d_data, n * sizeof (float )));
542561
543562 // Initialize with random data
563+ #ifdef HAS_ROC_LIBRARIES
544564 rocrand_generator gen;
545565 rocrand_create_generator (&gen, ROCRAND_RNG_PSEUDO_XORWOW);
546566 rocrand_generate_normal (gen, d_data, n, 0 .0f , 1 .0f );
547567 rocrand_destroy_generator (gen);
568+ #else
569+ // Initialize with simple pattern since rocrand is not available
570+ std::vector<float > h_data (n, 1 .0f );
571+ HIP_CHECK (hipMemcpy (d_data, h_data.data (), n * sizeof (float ), hipMemcpyHostToDevice));
572+ #endif
548573
549574 PerformanceTimer timer;
550575 const int iterations = 100 ;
@@ -582,7 +607,10 @@ void benchmark_activation_functions() {
582607 HIP_CHECK (hipFree (d_data));
583608}
584609
585- int main () {\n#ifdef HAS_ROC_LIBRARIES\n std::cout << \" HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\\ n\" ;\n std::cout << \" ======================================================================\\ n\" ;
610+ int main () {
611+ #ifdef HAS_ROC_LIBRARIES
612+ std::cout << " HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\n " ;
613+ std::cout << " ======================================================================\n " ;
586614
587615 // Check HIP device properties
588616 int device;
@@ -621,4 +649,20 @@ int main() {\n#ifdef HAS_ROC_LIBRARIES\n std::cout << \"HIP Deep Learning Inf
621649 std::cout << " Target: Production inference - Optimized for AMD GPU architecture\n " ;
622650 std::cout << " Target: Sub-millisecond latency - Achieved for small to medium models\n " ;
623651
624- } catch (const std::exception& e) {\n std::cerr << \" Error: \" << e.what() << std::endl;\n return -1;\n }\n \n return 0;\n #else\n std::cout << \" Note: This example requires ROC libraries (rocBLAS, rocRAND) which are not available.\" << std::endl;\n std::cout << \" To enable this example:\" << std::endl;\n std::cout << \" 1. Install ROC libraries: sudo apt install rocblas-dev rocrand-dev\" << std::endl;\n std::cout << \" 2. Compile with -DHAS_ROC_LIBRARIES flag\" << std::endl;\n std::cout << \" 3. Link with -lrocblas -lrocrand\" << std::endl;\n std::cout << std::endl;\n std::cout << \" Skipping deep learning operations...\" << std::endl;\n return 0;\n #endif\n }
652+ } catch (const std::exception& e) {
653+ std::cerr << " Error: " << e.what () << std::endl;
654+ return -1 ;
655+ }
656+
657+ return 0 ;
658+ #else
659+ std::cout << " Note: This example requires ROC libraries (rocBLAS, rocRAND) which are not available." << std::endl;
660+ std::cout << " To enable this example:" << std::endl;
661+ std::cout << " 1. Install ROC libraries: sudo apt install rocblas-dev rocrand-dev" << std::endl;
662+ std::cout << " 2. Compile with -DHAS_ROC_LIBRARIES flag" << std::endl;
663+ std::cout << " 3. Link with -lrocblas -lrocrand" << std::endl;
664+ std::cout << std::endl;
665+ std::cout << " Skipping deep learning operations..." << std::endl;
666+ return 0 ;
667+ #endif
668+ }
0 commit comments