diff --git a/.github/workflows/helpers/install_cudnn.sh b/.github/workflows/helpers/install_cudnn.sh index d77745451b..4f1e6fc769 100755 --- a/.github/workflows/helpers/install_cudnn.sh +++ b/.github/workflows/helpers/install_cudnn.sh @@ -46,8 +46,8 @@ wget -c -q $CUDNN_LINK if [[ "$cuda_version" == "11.6" || "$cuda_version" == "11.7" ]]; then tar -xf $CUDNN_TARBALL_NAME -C ./ CUDNN_EXTRACTED_TARBALL_NAME="${CUDNN_TARBALL_NAME::-7}" - sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/include/*" "/usr/local/include" - sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/lib/*" "/usr/local/lib" + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME"/include/* "/usr/local/include" + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME"/lib/* "/usr/local/lib" rm -rf "$CUDNN_EXTRACTED_TARBALL_NAME" else sudo tar -xzf $CUDNN_TARBALL_NAME -C /usr/local diff --git a/.github/workflows/multinode-test.yml b/.github/workflows/multinode-test.yml index 79a9f52c09..ba91067af1 100644 --- a/.github/workflows/multinode-test.yml +++ b/.github/workflows/multinode-test.yml @@ -83,4 +83,4 @@ jobs: env: SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} run: | - curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly multinode GPU test failed! :x: \"}" $SLACK_WEBHOOK + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly multinode GPU test failed! :x: \"}" $SLACK_WEBHOOK diff --git a/cmake/legion.cmake b/cmake/legion.cmake index 8e7bafb0a8..d75e16ee03 100644 --- a/cmake/legion.cmake +++ b/cmake/legion.cmake @@ -20,58 +20,18 @@ else() set(LEGION_URL "") if((FF_USE_PREBUILT_LEGION OR FF_USE_ALL_PREBUILT_LIBRARIES) AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64" AND FF_USE_PYTHON AND NOT FF_USE_GASNET AND FF_MAX_DIM EQUAL 5) - # For now, reusing pre-compiled Legion library only works when the Python library on the target machine - # is stored at the path `/opt/conda/lib/libpython3.10.so`. Here, we check if this is the case. - find_package(PythonInterp) - find_package(PythonLibs) - if(PYTHON_LIBRARIES STREQUAL "/opt/conda/lib/libpython3.10.so") - if(LINUX_VERSION MATCHES "20.04") + find_package (Python COMPONENTS Interpreter Development) + set(PY_VERSION "${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}") + message(STATUS "Python version: ${Python_VERSION}") + if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") + # Precompiled Legion is currently only available for Python 3.7, 3.8, 3.9, 3.10 + if (PY_VERSION VERSION_GREATER_EQUAL "3.7" AND PY_VERSION VERSION_LESS_EQUAL "3.10") if (FF_GPU_BACKEND STREQUAL "cuda") - if (CUDA_VERSION VERSION_EQUAL "11.0") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.7.0.tar.gz") - endif() - elseif(FF_GPU_BACKEND STREQUAL "hip_rocm") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_hip_rocm.tar.gz") + set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}_python${PY_VERSION}.tar.gz") + elseif (LINUX_VERSION MATCHES "20.04" AND FF_GPU_BACKEND STREQUAL "hip_rocm") + set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_hip_rocm_python${PY_VERSION}.tar.gz") endif() - elseif(LINUX_VERSION MATCHES "18.04") - if (FF_GPU_BACKEND STREQUAL "cuda") - if (CUDA_VERSION VERSION_EQUAL "10.1") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_10.1.243.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "10.2") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_10.2.89.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "11.0") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.7.0.tar.gz") - endif() - endif() - endif() + endif() endif() endif() @@ -94,7 +54,7 @@ else() FetchContent_Populate(${LEGION_NAME}) endif() - set(LEGION_FOLDER_PATH ${${LEGION_NAME}_SOURCE_DIR}/export/${LEGION_NAME}) + set(LEGION_FOLDER_PATH ${${LEGION_NAME}_SOURCE_DIR}) SET(LEGION_INCLUDE_DIR ${LEGION_FOLDER_PATH}/include) SET(LEGION_DEF_DIR ${LEGION_INCLUDE_DIR}) SET(LEGION_BIN_DIR ${LEGION_FOLDER_PATH}/bin/) diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index e1da2933e7..bb0e0968e9 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -4,46 +4,8 @@ set(NCCL_NAME nccl) set(NCCL_URL "") if((FF_USE_PREBUILT_NCCL OR FF_USE_ALL_PREBUILT_LIBRARIES) AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") - if(LINUX_VERSION MATCHES "20.04") - if (CUDA_VERSION VERSION_EQUAL "11.0") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.7.0.tar.gz") - endif() - elseif(LINUX_VERSION MATCHES "18.04") - if (CUDA_VERSION VERSION_EQUAL "10.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_10.1.243.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "10.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_10.2.89.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "11.0") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.7.0.tar.gz") - endif() + if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") + set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}.tar.gz") endif() endif() @@ -63,7 +25,7 @@ if(NCCL_URL) FetchContent_Populate(${NCCL_NAME}) endif() - set(NCCL_FOLDER_PATH ${${NCCL_NAME}_SOURCE_DIR}/deps/${NCCL_NAME}) + set(NCCL_FOLDER_PATH ${${NCCL_NAME}_SOURCE_DIR}) set(NCCL_INCLUDE_DIR ${NCCL_FOLDER_PATH}/include) set(NCCL_LIB_DIR ${NCCL_FOLDER_PATH}/lib) message(STATUS "NCCL library path: ${NCCL_FOLDER_PATH}") diff --git a/deps/legion b/deps/legion index 15b23cf0d9..7f8df4ee66 160000 --- a/deps/legion +++ b/deps/legion @@ -1 +1 @@ -Subproject commit 15b23cf0d95f186297f05c76611ddd3e2cbbe9f9 +Subproject commit 7f8df4ee66896acf1c1f5ac8f43808596046f54b