From 78f62763bed8bafcf0092438dd74fcf21b030a31 Mon Sep 17 00:00:00 2001 From: Mergen Nachin Date: Mon, 27 Apr 2026 14:10:57 -0400 Subject: [PATCH] =?UTF-8?q?Revert=20"Move=20torch=20pin=20from=20the=202.1?= =?UTF-8?q?1=20to=20the=202026-04-09=20nightly,=20and=20drop=20depr?= =?UTF-8?q?=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d7f87188c9c19c3a603f54a34a0898a9712320ab. --- .ci/docker/build.sh | 2 +- .ci/docker/ci_commit_pins/pytorch.txt | 2 +- .../install_cuda_windows_cross_compile.sh | 10 +-- .ci/docker/common/install_pytorch.sh | 9 +-- .ci/scripts/test_model_e2e.sh | 2 +- .ci/scripts/test_wheel_package_qnn.sh | 22 +++--- .ci/scripts/utils.sh | 6 +- .github/workflows/cuda-windows.yml | 8 +- .github/workflows/cuda.yml | 6 +- .github/workflows/docker-builds.yml | 7 +- .../models/moshi/mimi/install_requirements.sh | 2 +- install_requirements.py | 26 ++---- .../c10/torch/headeronly/macros/Macros.h | 79 ++++--------------- .../c10/torch/headeronly/util/BFloat16.h | 13 ++- torch_pin.py | 4 +- 15 files changed, 63 insertions(+), 135 deletions(-) diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 5d73835ea15..7c4a80044e4 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -81,7 +81,7 @@ case "${IMAGE_NAME}" in LINTRUNNER="" GCC_VERSION=11 CUDA_WINDOWS_CROSS_COMPILE=yes - CUDA_VERSION=12.6 + CUDA_VERSION=12.8 SKIP_PYTORCH=yes ;; *) diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index 25963674d4f..f6e39a63b92 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -358117c166b75167a09bca81ac9925940feda339 +release/2.11 \ No newline at end of file diff --git a/.ci/docker/common/install_cuda_windows_cross_compile.sh b/.ci/docker/common/install_cuda_windows_cross_compile.sh index 7f6826a7260..e3529751221 100644 --- a/.ci/docker/common/install_cuda_windows_cross_compile.sh +++ b/.ci/docker/common/install_cuda_windows_cross_compile.sh @@ -11,13 +11,12 @@ set -ex INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}" -# Mapping of CUDA versions to their corresponding driver versions for Windows installers. +# Mapping of CUDA versions to their corresponding driver versions for Windows installers # Source: https://developer.nvidia.com/cuda-toolkit-archive -# Format: "PATCH_VERSION:DRIVER_VERSION". Starting with CUDA 13.0, NVIDIA dropped the -# driver suffix from the Windows installer filename, so the driver field is empty. declare -A CUDA_DRIVER_MAP=( ["12.6"]="12.6.3:561.17" - ["13.0"]="13.0.3:" + ["12.8"]="12.8.1:572.61" + ["12.9"]="12.9.1:576.57" ) install_mingw() { @@ -84,8 +83,7 @@ install_windows_cuda() { mkdir -p "${INSTALL_DIR}" cd "${INSTALL_DIR}" - # CUDA 13.0+ installers no longer include the driver version in the filename. - CUDA_INSTALLER="cuda_${CUDA_VERSION}${CUDA_DRIVER_VERSION:+_${CUDA_DRIVER_VERSION}}_windows.exe" + CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe" CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}" # Check if already downloaded and extracted diff --git a/.ci/docker/common/install_pytorch.sh b/.ci/docker/common/install_pytorch.sh index 0d79671c827..548a24f885d 100755 --- a/.ci/docker/common/install_pytorch.sh +++ b/.ci/docker/common/install_pytorch.sh @@ -27,19 +27,14 @@ install_pytorch_and_domains() { chown -R ci-user . export _GLIBCXX_USE_CXX11_ABI=1 - # PyTorch's FindARM.cmake hard-fails when the SVE+BF16 compile probe - # doesn't pass — gcc-11 in this image is too old to accept the combined - # NEON/SVE/bfloat16 intrinsics the probe exercises. Executorch's aarch64 - # runtime targets (phones, embedded) don't use SVE, so bypass the check. - export BUILD_IGNORE_SVE_UNAVAILABLE=1 # Then build and install PyTorch conda_run python setup.py bdist_wheel pip_install "$(echo dist/*.whl)" # Grab the pinned audio and vision commits from PyTorch - TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt) + TORCHAUDIO_VERSION=release/2.11 export TORCHAUDIO_VERSION - TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt) + TORCHVISION_VERSION=release/0.26 export TORCHVISION_VERSION install_domains diff --git a/.ci/scripts/test_model_e2e.sh b/.ci/scripts/test_model_e2e.sh index f050538a283..8b8783d0db8 100755 --- a/.ci/scripts/test_model_e2e.sh +++ b/.ci/scripts/test_model_e2e.sh @@ -260,7 +260,7 @@ if [ "$AUDIO_URL" != "" ]; then elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then conda install -y -c conda-forge "ffmpeg<8" pip install datasets soundfile - pip install torchcodec==0.12.0.dev20260409 --extra-index-url https://download.pytorch.org/whl/nightly/cpu + pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])" fi diff --git a/.ci/scripts/test_wheel_package_qnn.sh b/.ci/scripts/test_wheel_package_qnn.sh index 43be46d1941..763bd8733c1 100644 --- a/.ci/scripts/test_wheel_package_qnn.sh +++ b/.ci/scripts/test_wheel_package_qnn.sh @@ -158,17 +158,17 @@ print(module_vars["TORCH_VERSION"]) PY ) - NIGHTLY_VERSION=$( - "$PYBIN" - <<'PY' -import runpy -module_vars = runpy.run_path("torch_pin.py") -print(module_vars["NIGHTLY_VERSION"]) -PY -) - echo "=== [$LABEL] Install torch==${TORCH_VERSION}.${NIGHTLY_VERSION} ===" - - # Install torchao based on the pinned PyTorch version - "$PIPBIN" install torch=="${TORCH_VERSION}.${NIGHTLY_VERSION}" --index-url "https://download.pytorch.org/whl/nightly/cpu" +# NIGHTLY_VERSION=$( +# "$PYBIN" - <<'PY' +# import runpy +# module_vars = runpy.run_path("torch_pin.py") +# print(module_vars["NIGHTLY_VERSION"]) +# PY +# ) + echo "=== [$LABEL] Install torch==${TORCH_VERSION} ===" + + # Install torch based on the pinned PyTorch version, preferring the PyTorch test index + "$PIPBIN" install torch=="${TORCH_VERSION}" --extra-index-url "https://download.pytorch.org/whl/test" "$PIPBIN" install wheel # Install torchao based on the pinned commit from third-party/ao submodule diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index 18038e36831..86e54b478ef 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -53,7 +53,7 @@ dedupe_macos_loader_path_rpaths() { pushd .. torch_lib_dir=$(python -c "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])")/lib popd - + if [[ -z "${torch_lib_dir}" || ! -d "${torch_lib_dir}" ]]; then return fi @@ -141,9 +141,9 @@ install_pytorch_and_domains() { dedupe_macos_loader_path_rpaths # Grab the pinned audio and vision commits from PyTorch - TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt) + TORCHAUDIO_VERSION=release/2.11 export TORCHAUDIO_VERSION - TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt) + TORCHVISION_VERSION=release/0.26 export TORCHVISION_VERSION install_domains diff --git a/.github/workflows/cuda-windows.yml b/.github/workflows/cuda-windows.yml index 105055c669c..265b7e3069d 100644 --- a/.github/workflows/cuda-windows.yml +++ b/.github/workflows/cuda-windows.yml @@ -64,7 +64,7 @@ jobs: secrets-env: EXECUTORCH_HF_TOKEN runner: linux.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: 12.6 + gpu-arch-version: 12.8 docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows submodules: recursive upload-artifact: ${{ matrix.model_repo }}-${{ matrix.model_name }}-cuda-windows-${{ matrix.quant }} @@ -146,7 +146,7 @@ jobs: timeout: 240 runner: windows.g5.4xlarge.nvidia.gpu gpu-arch-type: cuda - gpu-arch-version: 12.6 + gpu-arch-version: 12.8 download-artifact: ${{ matrix.model_repo }}-${{ matrix.model_name }}-cuda-windows-${{ matrix.quant }} ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} script: | @@ -158,7 +158,7 @@ jobs: \$ErrorActionPreference = 'Stop' \$PSNativeCommandUseErrorActionPreference = \$true - \$env:CUDA_HOME = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6' + \$env:CUDA_HOME = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8' \$env:CUDA_PATH = \$env:CUDA_HOME \$env:PATH = \"\$env:CUDA_HOME\bin;\$env:PATH\" nvcc --version @@ -169,5 +169,5 @@ jobs: throw 'RUNNER_ARTIFACT_DIR is empty. Ensure download-artifact is configured for windows_job.yml.' } - .ci/scripts/test_model_e2e_windows.ps1 -Device cuda-windows -HfModel '${{ matrix.model_repo }}/${{ matrix.model_name }}' -QuantName '${{ matrix.quant }}' -ModelDir \$artifactDir -ExpectedCudaVersion '12.6' + .ci/scripts/test_model_e2e_windows.ps1 -Device cuda-windows -HfModel '${{ matrix.model_repo }}/${{ matrix.model_name }}' -QuantName '${{ matrix.quant }}' -ModelDir \$artifactDir -ExpectedCudaVersion '12.8' }" diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 077f48ff0c9..c3b7c058ee6 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -1,6 +1,6 @@ # Test ExecuTorch CUDA Build Compatibility # This workflow tests whether ExecuTorch can be successfully built with CUDA support -# across different CUDA versions (12.6, 13.0) using the command: +# across different CUDA versions (12.6, 12.8, 12.9, 13.0) using the command: # ./install_executorch.sh # # Note: ExecuTorch automatically detects the system CUDA version using nvcc and @@ -31,7 +31,7 @@ jobs: strategy: fail-fast: false matrix: - cuda-version: ["12.6", "13.0"] + cuda-version: ["12.6", "12.8", "12.9", "13.0"] name: test-executorch-cuda-build-${{ matrix.cuda-version }} uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -66,7 +66,7 @@ jobs: echo "CUDA build results: ${{ needs.test-cuda-builds.result }}" exit 1 else - echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 13.0) completed successfully!" + echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9, 13.0) completed successfully!" fi test-models-cuda: diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index 25234ca387a..0f9778e9e69 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -33,20 +33,17 @@ jobs: matrix: runner: [linux.4xlarge] docker-image-name: [ + executorch-ubuntu-22.04-gcc11, executorch-ubuntu-22.04-gcc9-nopytorch, executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-linter, executorch-ubuntu-22.04-arm-sdk, + executorch-ubuntu-22.04-zephyr-sdk, executorch-ubuntu-22.04-qnn-sdk, executorch-ubuntu-22.04-mediatek-sdk, executorch-ubuntu-22.04-clang12-android ] include: - # PyTorch is built from source in these images; 4xlarge OOMs mid-build. - - docker-image-name: executorch-ubuntu-22.04-gcc11 - runner: linux.12xlarge - - docker-image-name: executorch-ubuntu-22.04-zephyr-sdk - runner: linux.12xlarge - docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64 runner: linux.arm64.2xlarge - docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64-android diff --git a/examples/models/moshi/mimi/install_requirements.sh b/examples/models/moshi/mimi/install_requirements.sh index 29e9fe10977..9fc12f64bc9 100755 --- a/examples/models/moshi/mimi/install_requirements.sh +++ b/examples/models/moshi/mimi/install_requirements.sh @@ -8,7 +8,7 @@ set -x sudo apt install ffmpeg -y -pip install torchcodec==0.12.0.dev20260409 --extra-index-url https://download.pytorch.org/whl/nightly/cpu +pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu pip install moshi==0.2.11 pip install bitsandbytes soundfile einops # Run llama2/install requirements for torchao deps diff --git a/install_requirements.py b/install_requirements.py index 85431bbc8d9..b30068cbdb8 100644 --- a/install_requirements.py +++ b/install_requirements.py @@ -12,11 +12,9 @@ from install_utils import determine_torch_url, is_intel_mac_os, python_is_compatible -from torch_pin import NIGHTLY_VERSION, TORCH_VERSION - # The pip repository that hosts nightly torch packages. # This will be dynamically set based on CUDA availability and CUDA backend enabled/disabled. -TORCH_NIGHTLY_URL_BASE = "https://download.pytorch.org/whl/nightly" +TORCH_URL_BASE = "https://download.pytorch.org/whl/test" # Since ExecuTorch often uses main-branch features of pytorch, only the nightly # pip versions will have the required features. @@ -44,18 +42,14 @@ def install_requirements(use_pytorch_nightly): sys.exit(1) # Determine the appropriate PyTorch URL based on CUDA delegate status - torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE) + torch_url = determine_torch_url(TORCH_URL_BASE) # pip packages needed by exir. TORCH_PACKAGE = [ # Setting use_pytorch_nightly to false to test the pinned PyTorch commit. Note # that we don't need to set any version number there because they have already # been installed on CI before this step, so pip won't reinstall them - ( - f"torch=={TORCH_VERSION}.{NIGHTLY_VERSION}" - if use_pytorch_nightly - else "torch" - ), + ("torch==2.11.0" if use_pytorch_nightly else "torch"), ] # Install the requirements for core ExecuTorch package. @@ -114,20 +108,12 @@ def install_requirements(use_pytorch_nightly): def install_optional_example_requirements(use_pytorch_nightly): # Determine the appropriate PyTorch URL based on CUDA delegate status - torch_url = determine_torch_url(TORCH_NIGHTLY_URL_BASE) + torch_url = determine_torch_url(TORCH_URL_BASE) print("Installing torch domain libraries") DOMAIN_LIBRARIES = [ - ( - f"torchvision==0.27.0.{NIGHTLY_VERSION}" - if use_pytorch_nightly - else "torchvision" - ), - ( - f"torchaudio==2.11.0.{NIGHTLY_VERSION}" - if use_pytorch_nightly - else "torchaudio" - ), + ("torchvision==0.26.0" if use_pytorch_nightly else "torchvision"), + ("torchaudio==2.11.0" if use_pytorch_nightly else "torchaudio"), ] # Then install domain libraries subprocess.run( diff --git a/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h b/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h index cef99df3f56..63aa0d20d8e 100644 --- a/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +++ b/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h @@ -325,88 +325,41 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; #define C10_HIP_HOST_DEVICE #endif +#if defined(USE_ROCM) // C10_WARP_SIZE is only allowed for device code. -// Host code dynamically-sized launch configs _must_ use at::cuda::warp_size(). -// Host or device statically-sized arrays _must_ use either -// C10_WARP_SIZE_UPPER_BOUND or C10_WARP_SIZE_LOWER_BOUND, as needed. -// +// Host code _must_ use at::cuda::warp_size() // HIP header used to define warpSize as a constexpr that was either 32 or 64 // depending on the target device, and then always set it to 64 for host code. -// For a time, that allowed C10_WARP_SIZE to be defined like so: -// -// #ifdef USE_ROCM -// #define C10_WARP_SIZE warpSize -// #else -// #define C10_WARP_SIZE 32 -// #endif -// -// In ROCm 7, warpSize is no longer constexpr, matching CUDA behavior. -// We can now only use warpSize for C10_WARP_SIZE in device code and this is -// enforced by using __device__ in its definition. In host code where -// C10_WARP_SIZE was previously used as a compile-time constant, this will now -// cause a compile-time error. -// -// If an array was previously expected to be sized at compile-time using -// C10_WARP_SIZE, users must now use either C10_WARP_SIZE_UPPER_BOUND or -// C10_WARP_SIZE_LOWER_BOUND depending on the situation. -// -// If C10_WARP_SIZE was previously used to determine kernel launch sizes, users -// must now use at::cuda::warp_size() for the dynamic runtime query. -// -// Unfortunately, C10_WARP_SIZE has been public and available for both host and -// device since approximately 2019, so forcing it to be device-only would break -// existing code in the wild. -#if defined(USE_ROCM) +// Host pass of HIP compiler needs C10_WARP_SIZE defined to _something_ so we +// set it to something unreasonable to trigger obvious host code errors. + namespace at::cuda { TORCH_CUDA_CPP_API int warp_size(); } -#if defined(__HIPCC__) -static __host__ inline int C10_WARP_SIZE_INTERNAL() { +#ifdef __HIPCC__ +static inline int __host__ C10_WARP_SIZE_INTERNAL() { return at::cuda::warp_size(); } -// NOTE: __device__ C10_WARP_SIZE_INTERNAL -// For __SPIRV__, we must use dynamic warpSize. When not targeting __SPIRV__, -// we can use constexpr. This matches prior behavior. We preserve this for -// backward compatibility instead of forcing old code to use dynamic warpSize -// and losing constexpr. However, compiling for --offload-arch=amdgcnspirv -// could expose where C10_WARP_SIZE was used incorrectly where the dynamic -// warpSize is not allowed. -#if defined(__SPIRV__) -static __device__ inline int C10_WARP_SIZE_INTERNAL() { - return warpSize; -} -#else // __SPIRV__ -static __device__ inline constexpr int C10_WARP_SIZE_INTERNAL() { + +static inline constexpr int __device__ C10_WARP_SIZE_INTERNAL() { #if defined(__GFX9__) return 64; #else // __GFX9__ return 32; #endif // __GFX9__ } -#endif // __SPIRV__ -#if defined(__SPIRV__) -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 64 -#elif defined(__GFX9__) -#define C10_WARP_SIZE_LOWER_BOUND 64 -#define C10_WARP_SIZE_UPPER_BOUND 64 -#else -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 32 -#endif -#else // !__HIPCC__ +#else // __HIPCC__ static inline int C10_WARP_SIZE_INTERNAL() { return at::cuda::warp_size(); } -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 64 #endif // __HIPCC__ + #define C10_WARP_SIZE (C10_WARP_SIZE_INTERNAL()) -#else // !USE_ROCM +#define C10_WARP_SIZE_STATIC 64 + +#else // defined(USE_ROCM) #define C10_WARP_SIZE 32 -#define C10_WARP_SIZE_LOWER_BOUND 32 -#define C10_WARP_SIZE_UPPER_BOUND 32 -#endif // USE_ROCM +#endif #if defined(_MSC_VER) && _MSC_VER <= 1900 #define __func__ __FUNCTION__ @@ -676,7 +629,7 @@ __host__ __device__ // This macro is used to find older C++ compilers // that don't support move optimization for return values. -#if (defined(__GNUC__) && __GNUC__ < 13 && __cplusplus < 202002L) || \ +#if (defined(__GNUC__) && __GNUC__ < 13) || \ (defined(__clang_major__) && __clang_major__ < 13) #define C10_RETURN_MOVE_IF_OLD_COMPILER 1 #else diff --git a/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h b/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h index 9aa08c265bd..64479ba36f1 100644 --- a/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +++ b/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h @@ -12,7 +12,7 @@ #include #include -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) #include #endif @@ -46,7 +46,7 @@ struct alignas(2) BFloat16 { /* implicit */ inline C10_HOST_DEVICE BFloat16(float value); inline C10_HOST_DEVICE operator float() const; -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) inline C10_HOST_DEVICE BFloat16(const __nv_bfloat16& value); explicit inline C10_HOST_DEVICE operator __nv_bfloat16() const; #endif @@ -124,9 +124,8 @@ C10_CLANG_DIAGNOSTIC_IGNORE("-Wimplicit-int-float-conversion") /// Constructors inline C10_HOST_DEVICE BFloat16::BFloat16(float value) : -#if defined(__CUDACC__) && \ - (!defined(USE_ROCM) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 || \ - defined(USE_ROCM) && (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) && defined(__CUDA_ARCH__) && \ + __CUDA_ARCH__ >= 800 x(__bfloat16_as_ushort(__float2bfloat16(value))) #elif defined(__SYCL_DEVICE_ONLY__) && \ defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) @@ -140,7 +139,7 @@ inline C10_HOST_DEVICE BFloat16::BFloat16(float value) /// Implicit conversions inline C10_HOST_DEVICE BFloat16::operator float() const { -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) return __bfloat162float(*reinterpret_cast(&x)); #elif defined(__SYCL_DEVICE_ONLY__) && \ defined(SYCL_EXT_ONEAPI_BFLOAT16_MATH_FUNCTIONS) @@ -150,7 +149,7 @@ inline C10_HOST_DEVICE BFloat16::operator float() const { #endif } -#if defined(__CUDACC__) && (!defined(USE_ROCM) || (TORCH_HIP_VERSION >= 702)) +#if defined(__CUDACC__) && !defined(USE_ROCM) inline C10_HOST_DEVICE BFloat16::BFloat16(const __nv_bfloat16& value) { x = *reinterpret_cast(&value); } diff --git a/torch_pin.py b/torch_pin.py index 10a015c081c..3575d9a376d 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ -TORCH_VERSION = "2.12.0" -NIGHTLY_VERSION = "dev20260409" +TORCH_VERSION = "2.11.0" +# NIGHTLY_VERSION = "dev20260318" Temporarily pinning to stable release candidate. Revert https://github.com/pytorch/executorch/pull/18287