Skip to content

Commit fde7cef

Browse files
committed
Updated deep learning hip
1 parent cddfd42 commit fde7cef

File tree

1 file changed

+98
-4
lines changed

1 file changed

+98
-4
lines changed

modules/module8/examples/01_deep_learning_hip.cpp

Lines changed: 98 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,27 @@ const int WAVEFRONT_SIZE = 64;earning Inference Kernels (HIP)
1515
*
1616
* Production-quality neural network inference implementations optimized for AMD GPU architectures.
1717
* This example demonstrates deep learning kernels adapted for ROCm/HIP with wavefront-aware
18-
* optimizations and LDS utilization patterns specific to AMD hardware.
18+
* optimizations and LDS utilization patterns spec HIP_CHECK(hipFree(d_data));
19+
}
20+
21+
#ifdef HAS_MIOPEN
22+
void demo_miopen_integration() {
23+
std::cout << "\n=== MIOpen Integration Demo ===\n";
24+
25+
// Initialize MIOpen handle
26+
miopenHandle_t miopen_handle;
27+
MIOPEN_CHECK(miopenCreate(&miopen_handle));
28+
29+
std::cout << "MIOpen handle created successfully\n";
30+
std::cout << "MIOpen is available for production neural network layers\n";
31+
std::cout << "Supported operations: Convolution, Pooling, Activation, BatchNorm, RNN\n";
32+
33+
// Cleanup
34+
MIOPEN_CHECK(miopenDestroy(miopen_handle));
35+
}
36+
#endif
37+
38+
int main() {fic to AMD hardware.
1939
*
2040
* Topics Covered:
2141
* - Wavefront-optimized convolution kernels for AMD GPUs
@@ -617,7 +637,12 @@ void benchmark_activation_functions() {
617637
float* d_data;
618638
HIP_CHECK(hipMalloc(&d_data, n * sizeof(float)));
619639

620-
// Initialize with random data\n#ifdef HAS_ROCRAND\n rocrand_generator gen;\n ROCRAND_CHECK(rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_XORWOW));\n ROCRAND_CHECK(rocrand_generate_normal(gen, d_data, n, 0.0f, 1.0f));\n ROCRAND_CHECK(rocrand_destroy_generator(gen));
640+
// Initialize with random data
641+
#ifdef HAS_ROCRAND
642+
rocrand_generator gen;
643+
ROCRAND_CHECK(rocrand_create_generator(&gen, ROCRAND_RNG_PSEUDO_XORWOW));
644+
ROCRAND_CHECK(rocrand_generate_normal(gen, d_data, n, 0.0f, 1.0f));
645+
ROCRAND_CHECK(rocrand_destroy_generator(gen));
621646
#else
622647
// Initialize with simple pattern since rocrand is not available
623648
std::vector<float> h_data(n, 1.0f);
@@ -657,7 +682,25 @@ void benchmark_activation_functions() {
657682
<< " (Bandwidth: " << std::setprecision(1) << relu_wf_bandwidth << " GB/s)\n";
658683
std::cout << " Speedup: " << std::setprecision(2) << relu_time / relu_wf_time << "x\n";
659684

660-
HIP_CHECK(hipFree(d_data));\n}\n\n#ifdef HAS_MIOPEN\nvoid demo_miopen_integration() {\n std::cout << \"\\n=== MIOpen Integration Demo ===\\n\";\n \n // Initialize MIOpen handle\n miopenHandle_t miopen_handle;\n MIOPEN_CHECK(miopenCreate(&miopen_handle));\n \n std::cout << \"MIOpen handle created successfully\\n\";\n std::cout << \"MIOpen is available for production neural network layers\\n\";\n std::cout << \"Supported operations: Convolution, Pooling, Activation, BatchNorm, RNN\\n\";\n \n // Cleanup\n MIOPEN_CHECK(miopenDestroy(miopen_handle));\n}\n#endif\n\nint main() {
685+
HIP_CHECK(hipFree(d_data));
686+
}
687+
688+
#ifdef HAS_MIOPEN
689+
void demo_miopen_integration() {
690+
std::cout << "\n=== MIOpen Integration Demo ===\n";
691+
692+
// Initialize MIOpen handle
693+
miopenHandle_t miopen_handle;
694+
MIOPEN_CHECK(miopenCreate(&miopen_handle));
695+
696+
std::cout << "MIOpen handle created successfully\n";
697+
std::cout << "MIOpen is available for production neural network layers\n";
698+
std::cout << "Supported operations: Convolution, Pooling, Activation, BatchNorm, RNN\n";
699+
700+
// Cleanup
701+
MIOPEN_CHECK(miopenDestroy(miopen_handle));
702+
}
703+
#endif\n\nint main() {
661704
#ifdef HAS_ROC_LIBRARIES
662705
std::cout << "HIP Deep Learning Inference Kernels - AMD GPU Optimized Implementation\n";
663706
std::cout << "======================================================================\n";
@@ -713,6 +756,57 @@ void benchmark_activation_functions() {
713756

714757
return 0;
715758
#else
716-
// Note: Individual ROCm libraries are detected automatically by the Makefile\n // The main functionality will run with whatever libraries are available\n \n // Check HIP device properties\n int device;\n hipGetDevice(&device);\n hipDeviceProp_t props;\n hipGetDeviceProperties(&props, device);\n \n std::cout << \"GPU: \" << props.name << \"\\n\";\n std::cout << \"Compute Capability: \" << props.major << \".\" << props.minor << \"\\n\";\n std::cout << \"Memory: \" << props.totalGlobalMem / (1024*1024) << \" MB\\n\";\n std::cout << \"Wavefront Size: \" << WAVEFRONT_SIZE << \"\\n\";\n std::cout << \"LDS Size per Workgroup: \" << props.sharedMemPerBlock << \" bytes\\n\";\n std::cout << \"Max Threads per Block: \" << props.maxThreadsPerBlock << \"\\n\\n\";\n \n // Print available ROCm libraries\n std::cout << \"Available ROCm Libraries:\\n\";\n#ifdef HAS_ROCBLAS\n std::cout << \" ✓ rocBLAS\\n\";\n#else\n std::cout << \" ✗ rocBLAS (install rocblas-dev)\\n\";\n#endif\n#ifdef HAS_ROCRAND\n std::cout << \" ✓ rocRAND\\n\";\n#else\n std::cout << \" ✗ rocRAND (install rocrand-dev)\\n\";\n#endif\n#ifdef HAS_ROCFFT\n std::cout << \" ✓ rocFFT\\n\";\n#else\n std::cout << \" ✗ rocFFT (install rocfft-dev)\\n\";\n#endif\n#ifdef HAS_MIOPEN\n std::cout << \" ✓ MIOpen\\n\";\n#else\n std::cout << \" ✗ MIOpen (install miopen-hip-dev)\\n\";\n#endif\n std::cout << \"\\n\";\n \n try {\n benchmark_convolution_kernels();
759+
// Note: Individual ROCm libraries are detected automatically by the Makefile
760+
// The main functionality will run with whatever libraries are available
761+
762+
// Check HIP device properties
763+
int device;
764+
hipGetDevice(&device);
765+
hipDeviceProp_t props;
766+
hipGetDeviceProperties(&props, device);
767+
768+
std::cout << "GPU: " << props.name << "\n";
769+
std::cout << "Compute Capability: " << props.major << "." << props.minor << "\n";
770+
std::cout << "Memory: " << props.totalGlobalMem / (1024*1024) << " MB\n";
771+
std::cout << "Wavefront Size: " << WAVEFRONT_SIZE << "\n";
772+
std::cout << "LDS Size per Workgroup: " << props.sharedMemPerBlock << " bytes\n";
773+
std::cout << "Max Threads per Block: " << props.maxThreadsPerBlock << "\n\n";
774+
775+
// Print available ROCm libraries
776+
std::cout << "Available ROCm Libraries:\n";
777+
#ifdef HAS_ROCBLAS
778+
std::cout << " ✓ rocBLAS\n";
779+
#else
780+
std::cout << " ✗ rocBLAS (install rocblas-dev)\n";
781+
#endif
782+
#ifdef HAS_ROCRAND
783+
std::cout << " ✓ rocRAND\n";
784+
#else
785+
std::cout << " ✗ rocRAND (install rocrand-dev)\n";
786+
#endif
787+
#ifdef HAS_ROCFFT
788+
std::cout << " ✓ rocFFT\n";
789+
#else
790+
std::cout << " ✗ rocFFT (install rocfft-dev)\n";
791+
#endif
792+
#ifdef HAS_MIOPEN
793+
std::cout << " ✓ MIOpen\n";
794+
#else
795+
std::cout << " ✗ MIOpen (install miopen-hip-dev)\n";
796+
#endif
797+
std::cout << "\n";
798+
799+
try {
800+
benchmark_convolution_kernels();
801+
802+
std::cout << "\n=== Note: Install ROCm libraries for full functionality ===\n";
803+
std::cout << " sudo apt install rocblas-dev rocrand-dev rocfft-dev miopen-hip-dev\n";
804+
805+
} catch (const std::exception& e) {
806+
std::cerr << "Error: " << e.what() << std::endl;
807+
return -1;
808+
}
809+
810+
return 0;
717811
#endif
718812
}

0 commit comments

Comments
 (0)