1010# or submit itself to any jurisdiction.
1111
1212# NOTE!!!! - Whenever this file is changed, move it over to alidist/resources
13- # FindO2GPU.cmake Version 8
13+ # FindO2GPU.cmake Version 9
14+
15+ set (CUDA_COMPUTETARGET_DEFAULT_FULL 80-real 86-real 89-real 120-real 75-virtual)
16+ set (HIP_AMDGPUTARGET_DEFAULT_FULL gfx906;gfx908)
17+ set (CUDA_COMPUTETARGET_DEFAULT_MINIMAL 75-virtual)
18+ set (HIP_AMDGPUTARGET_DEFAULT_MINIMAL gfx906)
1419
1520if (NOT DEFINED ENABLE_CUDA)
1621 set (ENABLE_CUDA "AUTO" )
@@ -32,11 +37,11 @@ if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
3237endif ()
3338
3439if (CUDA_COMPUTETARGET AND CUDA_COMPUTETARGET STREQUAL "default" )
35- set (CUDA_COMPUTETARGET 80-real 86-real 89-real 120-real 75-virtual )
40+ set (CUDA_COMPUTETARGET ${CUDA_COMPUTETARGET_DEFAULT_FULL} )
3641endif ()
3742
3843if (HIP_AMDGPUTARGET AND HIP_AMDGPUTARGET STREQUAL "default" )
39- set (HIP_AMDGPUTARGET gfx906;gfx908 )
44+ set (HIP_AMDGPUTARGET ${HIP_AMDGPUTARGET_DEFAULT_FULL} )
4045endif ()
4146
4247function (set_target_cuda_arch target )
@@ -112,7 +117,7 @@ if(ENABLE_CUDA)
112117 if (CUDA_COMPUTETARGET)
113118 set (CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} )
114119 else ()
115- set (CMAKE_CUDA_ARCHITECTURES 75-virtual )
120+ set (O2_GPU_CUDA_UPDATE_NATIVE_ARCHITECTURE 1 )
116121 endif ()
117122 set (CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD} )
118123 set (CMAKE_CUDA_STANDARD_REQUIRED TRUE )
@@ -156,6 +161,13 @@ if(ENABLE_CUDA)
156161 set (CMAKE_CUDA_COMPILER OFF )
157162 endif ()
158163 endif ()
164+ if (NOT CMAKE_CUDA_ARCHITECTURES OR O2_GPU_CUDA_UPDATE_NATIVE_ARCHITECTURE)
165+ if (NOT CMAKE_CUDA_ARCHITECTURES_NATIVE STREQUAL "" )
166+ set (CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES_NATIVE} )
167+ else ()
168+ set (CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET_DEFAULT_MINIMAL} )
169+ endif ()
170+ endif ()
159171 if (CMAKE_CUDA_COMPILER)
160172 set (CMAKE_CUDA_FLAGS "-Xcompiler \" ${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} \" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes -Wno-deprecated-gpu-targets ${GPUCA_CUDA_DENORMALS_FLAGS} " )
161173 set (CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \" ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
@@ -184,7 +196,7 @@ if(ENABLE_CUDA)
184196 endif ()
185197
186198 set (CUDA_ENABLED ON )
187- message (STATUS "CUDA found (Version ${CMAKE_CUDA_COMPILER_VERSION} )" )
199+ message (STATUS "CUDA found (Version ${CMAKE_CUDA_COMPILER_VERSION} , Architectures ${CMAKE_CUDA_ARCHITECTURES} )" )
188200 elseif (NOT ENABLE_CUDA STREQUAL "AUTO" )
189201 message (FATAL_ERROR "CUDA not found (Compiler: ${CMAKE_CUDA_COMPILER} )" )
190202 else ()
@@ -305,7 +317,6 @@ if(ENABLE_HIP)
305317 if (hip_FOUND AND hipcub_FOUND AND rocthrust_FOUND AND rocprim_FOUND AND hip_HIPCC_EXECUTABLE AND hip_HIPIFY_PERL_EXECUTABLE)
306318 set (HIP_ENABLED ON )
307319 set_target_properties (roc::rocthrust PROPERTIES IMPORTED_GLOBAL TRUE )
308- message (STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION} )" )
309320 set (CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS} " )
310321 set (CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} } ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} }" )
311322 string (APPEND CMAKE_HIP_FLAGS " -fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed " )
@@ -321,6 +332,7 @@ if(ENABLE_HIP)
321332 if (HIP_AMDGPUTARGET)
322333 set (CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET} " )
323334 endif ()
335+ message (STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION} , Architectures ${CMAKE_HIP_ARCHITECTURES} )" )
324336 else ()
325337 set (HIP_ENABLED OFF )
326338 endif ()
0 commit comments