diff --git a/.github/workflows/rocm-ci.yml b/.github/workflows/rocm-ci.yml index c8d655330..de8653005 100644 --- a/.github/workflows/rocm-ci.yml +++ b/.github/workflows/rocm-ci.yml @@ -1,4 +1,4 @@ -# Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. # # See LICENSE for license information. @@ -252,9 +252,10 @@ jobs: HIP_VISIBLE_DEVICES=2 ci/jax.sh > /workspace/jax_sgpu.log 2>&1 & jax_pid=$!; echo JAX test pid $! - ci/core.sh > /workspace/core_sgpu.log 2>&1 - core_rc=$? + HIP_VISIBLE_DEVICES=3 ci/core.sh > /workspace/core_sgpu.log 2>&1 & + core_pid=$!; echo Core test pid $! + wait $core_pid; core_rc=$? wait $jax_pid; jax_rc=$? wait $torch_pid; torch_rc=$? diff --git a/ci/core.sh b/ci/core.sh index 24ef245d2..e940b12ff 100755 --- a/ci/core.sh +++ b/ci/core.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. # # See LICENSE for license information. @@ -13,7 +13,7 @@ if [ -z "$TEST_SGPU" ]; then exit 0 fi -n_parallel_jobs=8 +n_parallel_jobs=4 configure_omp_threads $n_parallel_jobs diff --git a/ci/pytorch.sh b/ci/pytorch.sh index be150485f..54d81aa14 100755 --- a/ci/pytorch.sh +++ b/ci/pytorch.sh @@ -56,7 +56,6 @@ run_test_config(){ run_default_fa 1 test_fused_router.py run_default_fa 1 test_fusible_ops.py run_default_fa 1 test_gemm_autotune.py - run_default_fa 1 test_gemm_sm_count.py run 1 test_gqa.py run 1 test_jit.py run_default_fa 1 test_multi_tensor.py @@ -88,6 +87,9 @@ run_test_config_mgpu(){ echo ==== Run mGPU with Fused attention backend: $_fus_attn ==== configure_omp_threads 8 run_default_fa 1 test_fused_optimizer.py + #this test is not really mGPU but time sensitive so run it here because sGPU tests + #run in parallel on CI and it affects timing + run_default_fa 1 test_gemm_sm_count.py run_default_fa 3 test_sanity_import.py run_default_fa 2 distributed/test_fusible_ops.py run_default_fa 2 distributed/test_numerics.py