From d93b9c336b8a2c32057cc1eaffee4e7cdb62469a Mon Sep 17 00:00:00 2001 From: billishyahao Date: Fri, 29 May 2026 05:10:57 +0000 Subject: [PATCH 01/11] [AMD] improve dsr1 fp4 disagg > Co-authored-by: billishyahao > Co-authored-by: Duyi-Wang --- .github/configs/amd-master.yaml | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 3544aad49..2794f037d 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1862,7 +1862,7 @@ dsr1-fp4-mi355x-sglang-disagg: - "DECODE_MTP_SIZE=0" dsr1-fp4-mi355x-sglang-disagg-mtp: - image: lmsysorg/sglang-rocm:v0.5.12-rocm720-mi35x-20260519 + image: rocm/sgl-dev:v0.5.12.post1-rocm720-mi35x-20260526-sync model: amd/DeepSeek-R1-0528-MXFP4-v2 model-prefix: dsr1 runner: mi355x-disagg @@ -2030,7 +2030,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: dp-attn: false additional-settings: - "DECODE_NODES=2" - - "DECODE_MTP_SIZE=2" + - "DECODE_MTP_SIZE=3" # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" @@ -2049,7 +2049,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: dp-attn: true additional-settings: - "DECODE_NODES=1" - - "DECODE_MTP_SIZE=1" + - "DECODE_MTP_SIZE=3" # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" @@ -2068,7 +2068,26 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: dp-attn: true additional-settings: - "DECODE_NODES=1" - - "DECODE_MTP_SIZE=1" + - "DECODE_MTP_SIZE=3" + + # 1*DEP8 + 1*DEP8 + - spec-decoding: "mtp" + conc-list: [ 64, 128 ] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "PREFILL_NODES=1" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "DECODE_NODES=1" + - "DECODE_MTP_SIZE=3" # 2*DEP8 + 1*DEP8 - spec-decoding: "mtp" From 1739443c456ef3490225ad72f378252f9ddb51f3 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Fri, 29 May 2026 05:25:19 +0000 Subject: [PATCH 02/11] add perf changelog --- perf-changelog.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 935cded22..7aaf2cd75 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3200,3 +3200,10 @@ - "Bump image to lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260523, 1P1D TP8/EP1, dp-attn false, conc [8..512]" - "MoRI conn.py overlay (48e459bd) via job.slurm; launcher qwen3.5_fp4_mi355x_sglang-disagg.sh" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1579 + +- config-keys: + - dsr1-fp4-mi355x-sglang-disagg-mtp + description: + - "Bump the image to May 26" + - "Add conc 128/256 new sweep point" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1584 From bfb242ce3134850f753a1b4110169902d871a780 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sat, 30 May 2026 00:02:10 +0000 Subject: [PATCH 03/11] fix --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 2794f037d..56d6af6e4 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1862,7 +1862,7 @@ dsr1-fp4-mi355x-sglang-disagg: - "DECODE_MTP_SIZE=0" dsr1-fp4-mi355x-sglang-disagg-mtp: - image: rocm/sgl-dev:v0.5.12.post1-rocm720-mi35x-20260526-sync + image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529 model: amd/DeepSeek-R1-0528-MXFP4-v2 model-prefix: dsr1 runner: mi355x-disagg From 9a39f19cb81c4a941e051fc1a8f4c1aaef8f80d6 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sat, 30 May 2026 16:44:33 +0000 Subject: [PATCH 04/11] fix regression --- benchmarks/multi_node/amd_utils/env.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/multi_node/amd_utils/env.sh b/benchmarks/multi_node/amd_utils/env.sh index 8a141eaeb..3047edad0 100755 --- a/benchmarks/multi_node/amd_utils/env.sh +++ b/benchmarks/multi_node/amd_utils/env.sh @@ -126,7 +126,8 @@ else export SGLANG_USE_AITER=1 export SGLANG_MORI_DISPATCH_DTYPE=auto - export SGLANG_MORI_FP8_COMB=true + export MORI_COMBINE_DTYPE_PREFILL=fp8_direct_cast + export MORI_COMBINE_DTYPE_DECODE=fp8 export SGLANG_MORI_QP_PER_TRANSFER=4 export SGLANG_MORI_NUM_WORKERS=4 export MORI_IO_SQ_BACKOFF_TIMEOUT_US=50000 From ae39e6b609fb97552e07f35c54305fd59faff944 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sat, 30 May 2026 17:03:38 +0000 Subject: [PATCH 05/11] sync config --- .github/configs/amd-master.yaml | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 56d6af6e4..e9f9d8c06 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2034,7 +2034,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 128, 512 ] + conc-list: [ 384, 512 ] prefill: num-worker: 1 tp: 8 @@ -2053,7 +2053,27 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 64, 256 ] + conc-list: [ 192, 256 ] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "PREFILL_NODES=1" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "DECODE_NODES=1" + - "DECODE_MTP_SIZE=3" + + + # 1*DEP8 + 1*DEP8 + - spec-decoding: "mtp" + conc-list: [ 96, 128 ] prefill: num-worker: 1 tp: 8 @@ -2072,7 +2092,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 64, 128 ] + conc-list: [ 48, 64 ] prefill: num-worker: 1 tp: 8 From 7a72e6dd59074c6a497254c1fec2dd1d970916b4 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sun, 31 May 2026 02:11:01 +0000 Subject: [PATCH 06/11] fix --- .github/configs/amd-master.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index e9f9d8c06..a7c053ddf 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2034,7 +2034,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 384, 512 ] + conc-list: [ 640, 512 ] prefill: num-worker: 1 tp: 8 @@ -2053,7 +2053,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 192, 256 ] + conc-list: [ 288, 256 ] prefill: num-worker: 1 tp: 8 @@ -2073,7 +2073,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 96, 128 ] + conc-list: [ 144, 128 ] prefill: num-worker: 1 tp: 8 @@ -2092,7 +2092,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 48, 64 ] + conc-list: [ 72, 64 ] prefill: num-worker: 1 tp: 8 From 72ea8c3828b8c1cb6501ced961cd4f80ac6d3f4c Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sun, 31 May 2026 02:21:28 +0000 Subject: [PATCH 07/11] fix --- .github/configs/amd-master.yaml | 16 ++++++++++++++-- perf-changelog.yaml | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 8b316b387..da8ad30bc 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1861,7 +1861,7 @@ dsr1-fp4-mi355x-sglang-disagg: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=0" -dsr1-fp4-mi355x-sglang-disagg-mtp: +dsr1-fp4-mi355x-sglang-disagg-1k1k-mtp: image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529 model: amd/DeepSeek-R1-0528-MXFP4-v2 model-prefix: dsr1 @@ -1970,7 +1970,19 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: additional-settings: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=1" + +dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: + image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529 + model: amd/DeepSeek-R1-0528-MXFP4-v2 + model-prefix: dsr1 + runner: mi355x-disagg + precision: fp4 + framework: sglang-disagg + multinode: true + disagg: true + scenarios: + fixed-seq-len: - isl: 8192 osl: 1024 search-space: @@ -2127,7 +2139,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: additional-settings: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=1" - + # DSv4-Pro FP4 on MI355X via SGLang. Uses a rocm720 mi35x image built off the # amd/deepseek_v4 branch in sgl-project/sglang; the SHA is encoded in the diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 305c37509..d043b788b 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3202,7 +3202,7 @@ pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1579 - config-keys: - - dsr1-fp4-mi355x-sglang-disagg-mtp + - dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp description: - "Bump the image to May 26" - "Add conc 128/256 new sweep point" From 12173fa51a55c1bd0c6bc68189e59fc9bc706ad7 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sun, 31 May 2026 03:17:46 +0000 Subject: [PATCH 08/11] suppress aiter log --- benchmarks/multi_node/amd_utils/env.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/multi_node/amd_utils/env.sh b/benchmarks/multi_node/amd_utils/env.sh index 3047edad0..71d2653bd 100755 --- a/benchmarks/multi_node/amd_utils/env.sh +++ b/benchmarks/multi_node/amd_utils/env.sh @@ -124,6 +124,7 @@ else # ========================================================================= export SGLANG_USE_AITER=1 + export AITER_LOG_LEVEL=ERROR export SGLANG_MORI_DISPATCH_DTYPE=auto export MORI_COMBINE_DTYPE_PREFILL=fp8_direct_cast From 44c137684b9474045887cc4d6f7c37ada0690707 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sun, 31 May 2026 07:04:49 +0000 Subject: [PATCH 09/11] fix --- .github/configs/amd-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 98c0d4bc2..0b1cef8dd 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2027,7 +2027,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 1P2D TP8 - spec-decoding: "mtp" - conc-list: [ 64, 128, 256 ] + conc-list: [ 72, 64 ] prefill: num-worker: 1 tp: 8 From a0a1ddc2cbf2ad8c65b15c3aada85cde3a2eb0ba Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sun, 31 May 2026 08:10:15 +0000 Subject: [PATCH 10/11] fix --- .github/configs/amd-master.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 0b1cef8dd..7a4ddd1cc 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2027,7 +2027,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 1P2D TP8 - spec-decoding: "mtp" - conc-list: [ 72, 64 ] + conc-list: [ 32, 64 ] prefill: num-worker: 1 tp: 8 @@ -2046,7 +2046,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 640, 512 ] + conc-list: [ 512 ] prefill: num-worker: 1 tp: 8 @@ -2065,7 +2065,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 288, 256 ] + conc-list: [ 256 ] prefill: num-worker: 1 tp: 8 @@ -2085,7 +2085,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 144, 128 ] + conc-list: [ 128 ] prefill: num-worker: 1 tp: 8 @@ -2104,7 +2104,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 72, 64 ] + conc-list: [ 64 ] prefill: num-worker: 1 tp: 8 @@ -2123,7 +2123,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 2*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 1024, 2048, 4096 ] + conc-list: [ 2048, 4096 ] prefill: num-worker: 2 tp: 8 From cbc716887bdff3369d82a38aa9cf4fbc35e5d2e1 Mon Sep 17 00:00:00 2001 From: billishyahao Date: Sun, 31 May 2026 13:32:16 +0000 Subject: [PATCH 11/11] fix --- .github/configs/amd-master.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index 7a4ddd1cc..0b7336fb7 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -2046,7 +2046,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 512 ] + conc-list: [ 640, 512 ] prefill: num-worker: 1 tp: 8 @@ -2123,7 +2123,7 @@ dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: # 2*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 2048, 4096 ] + conc-list: [ 1024, 2048, 4096 ] prefill: num-worker: 2 tp: 8