@@ -15,7 +15,27 @@ const int WAVEFRONT_SIZE = 64;earning Inference Kernels (HIP)
1515 *
1616 * Production-quality neural network inference implementations optimized for AMD GPU architectures.
1717 * This example demonstrates deep learning kernels adapted for ROCm/HIP with wavefront-aware
18- * optimizations and LDS utilization patterns specific to AMD hardware.
18+ * optimizations and LDS utilization patterns spec HIP_CHECK(hipFree(d_data));
19+ }
20+
21+ #ifdef HAS_MIOPEN
22+ void demo_miopen_integration() {
23+ std::cout << "\n=== MIOpen Integration Demo ===\n";
24+
25+ // Initialize MIOpen handle
26+ miopenHandle_t miopen_handle;
27+ MIOPEN_CHECK(miopenCreate(&miopen_handle));
28+
29+ std::cout << "MIOpen handle created successfully\n";
30+ std::cout << "MIOpen is available for production neural network layers\n";
31+ std::cout << "Supported operations: Convolution, Pooling, Activation, BatchNorm, RNN\n";
32+
33+ // Cleanup
34+ MIOPEN_CHECK(miopenDestroy(miopen_handle));
35+ }
36+ #endif
37+
38+ int main() {fic to AMD hardware.
1939 *
2040 * Topics Covered:
2141 * - Wavefront-optimized convolution kernels for AMD GPUs
@@ -617,7 +637,12 @@ void benchmark_activation_functions() {
617637 float * d_data;
618638 HIP_CHECK (hipMalloc (&d_data, n * sizeof (float )));
619639
620- // Initialize with random data\n#ifdef HAS_ROCRAND\n rocrand_generator gen;\n ROCRAND_CHECK(rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_XORWOW));\n ROCRAND_CHECK(rocrand_generate_normal(gen, d_data, n, 0.0f, 1.0f));\n ROCRAND_CHECK(rocrand_destroy_generator(gen));
640+ // Initialize with random data
641+ #ifdef HAS_ROCRAND
642+ rocrand_generator gen;
643+ ROCRAND_CHECK (rocrand_create_generator (&gen, ROCRAND_RNG_PSEUDO_XORWOW));
644+ ROCRAND_CHECK (rocrand_generate_normal (gen, d_data, n, 0 .0f , 1 .0f ));
645+ ROCRAND_CHECK (rocrand_destroy_generator (gen));
621646#else
622647 // Initialize with simple pattern since rocrand is not available
623648 std::vector<float > h_data (n, 1 .0f );
@@ -657,7 +682,25 @@ void benchmark_activation_functions() {
657682 << " (Bandwidth: " << std::setprecision (1 ) << relu_wf_bandwidth << " GB/s)\n " ;
658683 std::cout << " Speedup: " << std::setprecision (2 ) << relu_time / relu_wf_time << " x\n " ;
659684
660- HIP_CHECK (hipFree (d_data));\n}\n\n#ifdef HAS_MIOPEN\nvoid demo_miopen_integration () {\n std::cout << \" \\ n=== MIOpen Integration Demo ===\\ n\" ;\n \n // Initialize MIOpen handle\n miopenHandle_t miopen_handle;\n MIOPEN_CHECK(miopenCreate(&miopen_handle));\n \n std::cout << \" MIOpen handle created successfully\\ n\" ;\n std::cout << \" MIOpen is available for production neural network layers\\ n\" ;\n std::cout << \" Supported operations: Convolution, Pooling, Activation, BatchNorm, RNN\\ n\" ;\n \n // Cleanup\n MIOPEN_CHECK(miopenDestroy(miopen_handle));\n }\n #endif\n\n int main() {
685+ HIP_CHECK (hipFree (d_data));
686+ }
687+
688+ #ifdef HAS_MIOPEN
689+ void demo_miopen_integration () {
690+ std::cout << " \n === MIOpen Integration Demo ===\n " ;
691+
692+ // Initialize MIOpen handle
693+ miopenHandle_t miopen_handle;
694+ MIOPEN_CHECK (miopenCreate (&miopen_handle));
695+
696+ std::cout << " MIOpen handle created successfully\n " ;
697+ std::cout << " MIOpen is available for production neural network layers\n " ;
698+ std::cout << " Supported operations: Convolution, Pooling, Activation, BatchNorm, RNN\n " ;
699+
700+ // Cleanup
701+ MIOPEN_CHECK (miopenDestroy (miopen_handle));
702+ }
703+ #endif \n\nint main () {
661704#ifdef HAS_ROC_LIBRARIES
662705 std::cout << " HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\n " ;
663706 std::cout << " ======================================================================\n " ;
@@ -713,6 +756,57 @@ void benchmark_activation_functions() {
713756
714757 return 0 ;
715758#else
716- // Note: Individual ROCm libraries are detected automatically by the Makefile\n // The main functionality will run with whatever libraries are available\n \n // Check HIP device properties\n int device;\n hipGetDevice(&device);\n hipDeviceProp_t props;\n hipGetDeviceProperties(&props, device);\n \n std::cout << \"GPU: \" << props.name << \"\\n\";\n std::cout << \"Compute Capability: \" << props.major << \".\" << props.minor << \"\\n\";\n std::cout << \"Memory: \" << props.totalGlobalMem / (1024*1024) << \" MB\\n\";\n std::cout << \"Wavefront Size: \" << WAVEFRONT_SIZE << \"\\n\";\n std::cout << \"LDS Size per Workgroup: \" << props.sharedMemPerBlock << \" bytes\\n\";\n std::cout << \"Max Threads per Block: \" << props.maxThreadsPerBlock << \"\\n\\n\";\n \n // Print available ROCm libraries\n std::cout << \"Available ROCm Libraries:\\n\";\n#ifdef HAS_ROCBLAS\n std::cout << \" ✓ rocBLAS\\n\";\n#else\n std::cout << \" ✗ rocBLAS (install rocblas-dev)\\n\";\n#endif\n#ifdef HAS_ROCRAND\n std::cout << \" ✓ rocRAND\\n\";\n#else\n std::cout << \" ✗ rocRAND (install rocrand-dev)\\n\";\n#endif\n#ifdef HAS_ROCFFT\n std::cout << \" ✓ rocFFT\\n\";\n#else\n std::cout << \" ✗ rocFFT (install rocfft-dev)\\n\";\n#endif\n#ifdef HAS_MIOPEN\n std::cout << \" ✓ MIOpen\\n\";\n#else\n std::cout << \" ✗ MIOpen (install miopen-hip-dev)\\n\";\n#endif\n std::cout << \"\\n\";\n \n try {\n benchmark_convolution_kernels();
759+ // Note: Individual ROCm libraries are detected automatically by the Makefile
760+ // The main functionality will run with whatever libraries are available
761+
762+ // Check HIP device properties
763+ int device;
764+ hipGetDevice (&device);
765+ hipDeviceProp_t props;
766+ hipGetDeviceProperties (&props, device);
767+
768+ std::cout << " GPU: " << props.name << " \n " ;
769+ std::cout << " Compute Capability: " << props.major << " ." << props.minor << " \n " ;
770+ std::cout << " Memory: " << props.totalGlobalMem / (1024 *1024 ) << " MB\n " ;
771+ std::cout << " Wavefront Size: " << WAVEFRONT_SIZE << " \n " ;
772+ std::cout << " LDS Size per Workgroup: " << props.sharedMemPerBlock << " bytes\n " ;
773+ std::cout << " Max Threads per Block: " << props.maxThreadsPerBlock << " \n\n " ;
774+
775+ // Print available ROCm libraries
776+ std::cout << " Available ROCm Libraries:\n " ;
777+ #ifdef HAS_ROCBLAS
778+ std::cout << " ✓ rocBLAS\n " ;
779+ #else
780+ std::cout << " ✗ rocBLAS (install rocblas-dev)\n " ;
781+ #endif
782+ #ifdef HAS_ROCRAND
783+ std::cout << " ✓ rocRAND\n " ;
784+ #else
785+ std::cout << " ✗ rocRAND (install rocrand-dev)\n " ;
786+ #endif
787+ #ifdef HAS_ROCFFT
788+ std::cout << " ✓ rocFFT\n " ;
789+ #else
790+ std::cout << " ✗ rocFFT (install rocfft-dev)\n " ;
791+ #endif
792+ #ifdef HAS_MIOPEN
793+ std::cout << " ✓ MIOpen\n " ;
794+ #else
795+ std::cout << " ✗ MIOpen (install miopen-hip-dev)\n " ;
796+ #endif
797+ std::cout << " \n " ;
798+
799+ try {
800+ benchmark_convolution_kernels ();
801+
802+ std::cout << " \n === Note: Install ROCm libraries for full functionality ===\n " ;
803+ std::cout << " sudo apt install rocblas-dev rocrand-dev rocfft-dev miopen-hip-dev\n " ;
804+
805+ } catch (const std::exception& e) {
806+ std::cerr << " Error: " << e.what () << std::endl;
807+ return -1 ;
808+ }
809+
810+ return 0 ;
717811#endif
718812}
0 commit comments