From 288d4ff80822cbcfee0bc1ef0f010058dc5471fd Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 01:19:11 -0500 Subject: [PATCH 1/7] update link paths --- cmake/legion.cmake | 48 ++++------------------------------------------ cmake/nccl.cmake | 42 ++-------------------------------------- 2 files changed, 6 insertions(+), 84 deletions(-) diff --git a/cmake/legion.cmake b/cmake/legion.cmake index 8e7bafb0a8..5eb7c12541 100644 --- a/cmake/legion.cmake +++ b/cmake/legion.cmake @@ -25,53 +25,13 @@ else() find_package(PythonInterp) find_package(PythonLibs) if(PYTHON_LIBRARIES STREQUAL "/opt/conda/lib/libpython3.10.so") - if(LINUX_VERSION MATCHES "20.04") + if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") if (FF_GPU_BACKEND STREQUAL "cuda") - if (CUDA_VERSION VERSION_EQUAL "11.0") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_11.7.0.tar.gz") - endif() - elseif(FF_GPU_BACKEND STREQUAL "hip_rocm") + set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}.tar.gz") + elseif (LINUX_VERSION MATCHES "20.04" AND FF_GPU_BACKEND STREQUAL "hip_rocm") set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_hip_rocm.tar.gz") endif() - elseif(LINUX_VERSION MATCHES "18.04") - if (FF_GPU_BACKEND STREQUAL "cuda") - if (CUDA_VERSION VERSION_EQUAL "10.1") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_10.1.243.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "10.2") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_10.2.89.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "11.0") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-18.04_11.7.0.tar.gz") - endif() - endif() - endif() + endif() endif() endif() diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index e1da2933e7..6581487b85 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -4,46 +4,8 @@ set(NCCL_NAME nccl) set(NCCL_URL "") if((FF_USE_PREBUILT_NCCL OR FF_USE_ALL_PREBUILT_LIBRARIES) AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") - if(LINUX_VERSION MATCHES "20.04") - if (CUDA_VERSION VERSION_EQUAL "11.0") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.7.0.tar.gz") - endif() - elseif(LINUX_VERSION MATCHES "18.04") - if (CUDA_VERSION VERSION_EQUAL "10.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_10.1.243.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "10.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_10.2.89.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "11.0") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.7.0.tar.gz") - endif() + if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") + set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}.tar.gz") endif() endif() From 566c73bd0e77a7ec2f5ca53d7d3337a6035bbfba Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 01:19:44 -0500 Subject: [PATCH 2/7] update flexflow --- deps/legion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/legion b/deps/legion index 15b23cf0d9..7f8df4ee66 160000 --- a/deps/legion +++ b/deps/legion @@ -1 +1 @@ -Subproject commit 15b23cf0d95f186297f05c76611ddd3e2cbbe9f9 +Subproject commit 7f8df4ee66896acf1c1f5ac8f43808596046f54b From 03a46224492705730ae63c98b85c51a737bc3d00 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Fri, 27 Jan 2023 01:22:22 -0500 Subject: [PATCH 3/7] remove check for python path --- cmake/legion.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/legion.cmake b/cmake/legion.cmake index 5eb7c12541..439b1d9674 100644 --- a/cmake/legion.cmake +++ b/cmake/legion.cmake @@ -24,7 +24,9 @@ else() # is stored at the path `/opt/conda/lib/libpython3.10.so`. Here, we check if this is the case. find_package(PythonInterp) find_package(PythonLibs) - if(PYTHON_LIBRARIES STREQUAL "/opt/conda/lib/libpython3.10.so") + # move python version into url when pre-building for multiple linux versions + get_filename_component(PYTHON_LIBNAME "${PYTHON_LIBRARIES}" NAME) + if(PYTHON_LIBNAME STREQUAL "libpython3.10.so") if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") if (FF_GPU_BACKEND STREQUAL "cuda") set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}.tar.gz") From b3aa6e4629e53fda05fe9cac8c2ae37bc4228637 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 11:53:15 -0500 Subject: [PATCH 4/7] fix multinode link for slack message --- .github/workflows/multinode-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/multinode-test.yml b/.github/workflows/multinode-test.yml index 79a9f52c09..ba91067af1 100644 --- a/.github/workflows/multinode-test.yml +++ b/.github/workflows/multinode-test.yml @@ -83,4 +83,4 @@ jobs: env: SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} run: | - curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly multinode GPU test failed! :x: \"}" $SLACK_WEBHOOK + curl -X POST -H 'Content-type: application/json' --data "{\"text\":\"Weekly multinode GPU test failed! :x: \"}" $SLACK_WEBHOOK From 177997add99df88ee709ce2ab1f7b230c7807b22 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 16:14:45 -0500 Subject: [PATCH 5/7] fix --- cmake/legion.cmake | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/cmake/legion.cmake b/cmake/legion.cmake index 439b1d9674..3be86e7b61 100644 --- a/cmake/legion.cmake +++ b/cmake/legion.cmake @@ -20,18 +20,15 @@ else() set(LEGION_URL "") if((FF_USE_PREBUILT_LEGION OR FF_USE_ALL_PREBUILT_LIBRARIES) AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64" AND FF_USE_PYTHON AND NOT FF_USE_GASNET AND FF_MAX_DIM EQUAL 5) - # For now, reusing pre-compiled Legion library only works when the Python library on the target machine - # is stored at the path `/opt/conda/lib/libpython3.10.so`. Here, we check if this is the case. - find_package(PythonInterp) - find_package(PythonLibs) - # move python version into url when pre-building for multiple linux versions - get_filename_component(PYTHON_LIBNAME "${PYTHON_LIBRARIES}" NAME) - if(PYTHON_LIBNAME STREQUAL "libpython3.10.so") - if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") + find_package (Python COMPONENTS Interpreter Development) + message(STATUS "Python version: ${Python_VERSION}") + if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") + # Precompiled Legion is currently only available for Python 3.7, 3.8, 3.9, 3.10 + if (Python_VERSION VERSION_GREATER_EQUAL "3.7" AND Python_VERSION VERSION_LESS_EQUAL "3.10") if (FF_GPU_BACKEND STREQUAL "cuda") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}.tar.gz") + set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}_python${Python_VERSION}.tar.gz") elseif (LINUX_VERSION MATCHES "20.04" AND FF_GPU_BACKEND STREQUAL "hip_rocm") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_hip_rocm.tar.gz") + set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_hip_rocm_python${Python_VERSION}.tar.gz") endif() endif() endif() From 52806d1036fd7a4b4e06327f5f19adee20ae6b40 Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sun, 29 Jan 2023 03:33:05 +0000 Subject: [PATCH 6/7] fix paths and links --- cmake/legion.cmake | 9 +++++---- cmake/nccl.cmake | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cmake/legion.cmake b/cmake/legion.cmake index 3be86e7b61..d75e16ee03 100644 --- a/cmake/legion.cmake +++ b/cmake/legion.cmake @@ -21,14 +21,15 @@ else() if((FF_USE_PREBUILT_LEGION OR FF_USE_ALL_PREBUILT_LIBRARIES) AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64" AND FF_USE_PYTHON AND NOT FF_USE_GASNET AND FF_MAX_DIM EQUAL 5) find_package (Python COMPONENTS Interpreter Development) + set(PY_VERSION "${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}") message(STATUS "Python version: ${Python_VERSION}") if(LINUX_VERSION MATCHES "20.04" OR LINUX_VERSION MATCHES "18.04") # Precompiled Legion is currently only available for Python 3.7, 3.8, 3.9, 3.10 - if (Python_VERSION VERSION_GREATER_EQUAL "3.7" AND Python_VERSION VERSION_LESS_EQUAL "3.10") + if (PY_VERSION VERSION_GREATER_EQUAL "3.7" AND PY_VERSION VERSION_LESS_EQUAL "3.10") if (FF_GPU_BACKEND STREQUAL "cuda") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}_python${Python_VERSION}.tar.gz") + set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-${LINUX_VERSION}_cuda-${CUDA_VERSION}_python${PY_VERSION}.tar.gz") elseif (LINUX_VERSION MATCHES "20.04" AND FF_GPU_BACKEND STREQUAL "hip_rocm") - set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_hip_rocm_python${Python_VERSION}.tar.gz") + set(LEGION_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/legion_ubuntu-20.04_hip_rocm_python${PY_VERSION}.tar.gz") endif() endif() endif() @@ -53,7 +54,7 @@ else() FetchContent_Populate(${LEGION_NAME}) endif() - set(LEGION_FOLDER_PATH ${${LEGION_NAME}_SOURCE_DIR}/export/${LEGION_NAME}) + set(LEGION_FOLDER_PATH ${${LEGION_NAME}_SOURCE_DIR}) SET(LEGION_INCLUDE_DIR ${LEGION_FOLDER_PATH}/include) SET(LEGION_DEF_DIR ${LEGION_INCLUDE_DIR}) SET(LEGION_BIN_DIR ${LEGION_FOLDER_PATH}/bin/) diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index 6581487b85..bb0e0968e9 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -25,7 +25,7 @@ if(NCCL_URL) FetchContent_Populate(${NCCL_NAME}) endif() - set(NCCL_FOLDER_PATH ${${NCCL_NAME}_SOURCE_DIR}/deps/${NCCL_NAME}) + set(NCCL_FOLDER_PATH ${${NCCL_NAME}_SOURCE_DIR}) set(NCCL_INCLUDE_DIR ${NCCL_FOLDER_PATH}/include) set(NCCL_LIB_DIR ${NCCL_FOLDER_PATH}/lib) message(STATUS "NCCL library path: ${NCCL_FOLDER_PATH}") From d3de1a2a2127b9546f33ba6a2c4d852c61068a2e Mon Sep 17 00:00:00 2001 From: Gabriele Oliaro Date: Sat, 28 Jan 2023 22:38:39 -0500 Subject: [PATCH 7/7] fix cudnn install script --- .github/workflows/helpers/install_cudnn.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/helpers/install_cudnn.sh b/.github/workflows/helpers/install_cudnn.sh index d77745451b..4f1e6fc769 100755 --- a/.github/workflows/helpers/install_cudnn.sh +++ b/.github/workflows/helpers/install_cudnn.sh @@ -46,8 +46,8 @@ wget -c -q $CUDNN_LINK if [[ "$cuda_version" == "11.6" || "$cuda_version" == "11.7" ]]; then tar -xf $CUDNN_TARBALL_NAME -C ./ CUDNN_EXTRACTED_TARBALL_NAME="${CUDNN_TARBALL_NAME::-7}" - sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/include/*" "/usr/local/include" - sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME/lib/*" "/usr/local/lib" + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME"/include/* "/usr/local/include" + sudo cp -r "$CUDNN_EXTRACTED_TARBALL_NAME"/lib/* "/usr/local/lib" rm -rf "$CUDNN_EXTRACTED_TARBALL_NAME" else sudo tar -xzf $CUDNN_TARBALL_NAME -C /usr/local