Skip to content
71 changes: 61 additions & 10 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1861,8 +1861,8 @@ dsr1-fp4-mi355x-sglang-disagg:
- "DECODE_NODES=1"
- "DECODE_MTP_SIZE=0"

dsr1-fp4-mi355x-sglang-disagg-mtp:
image: lmsysorg/sglang-rocm:v0.5.12-rocm720-mi35x-20260519
dsr1-fp4-mi355x-sglang-disagg-1k1k-mtp:
image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529
model: amd/DeepSeek-R1-0528-MXFP4-v2
model-prefix: dsr1
runner: mi355x-disagg
Expand Down Expand Up @@ -1970,7 +1970,19 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
additional-settings:
- "DECODE_NODES=1"
- "DECODE_MTP_SIZE=1"


dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp:
image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529
model: amd/DeepSeek-R1-0528-MXFP4-v2
model-prefix: dsr1
runner: mi355x-disagg
precision: fp4
framework: sglang-disagg
multinode: true
disagg: true
scenarios:
fixed-seq-len:
- isl: 8192
osl: 1024
search-space:
Expand Down Expand Up @@ -2015,7 +2027,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:

# 1P2D TP8
- spec-decoding: "mtp"
conc-list: [ 64, 128, 256 ]
conc-list: [ 32, 64 ]
prefill:
num-worker: 1
tp: 8
Expand All @@ -2030,11 +2042,11 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
dp-attn: false
additional-settings:
- "DECODE_NODES=2"
- "DECODE_MTP_SIZE=2"
- "DECODE_MTP_SIZE=3"

# 1*DEP8 + 1*DEP8
- spec-decoding: "mtp"
conc-list: [ 128, 512 ]
conc-list: [ 512 ]
prefill:
num-worker: 1
tp: 8
Expand All @@ -2049,11 +2061,11 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
dp-attn: true
additional-settings:
- "DECODE_NODES=1"
- "DECODE_MTP_SIZE=1"
- "DECODE_MTP_SIZE=3"

# 1*DEP8 + 1*DEP8
- spec-decoding: "mtp"
conc-list: [ 64, 256 ]
conc-list: [ 256 ]
prefill:
num-worker: 1
tp: 8
Expand All @@ -2068,11 +2080,50 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
dp-attn: true
additional-settings:
- "DECODE_NODES=1"
- "DECODE_MTP_SIZE=1"
- "DECODE_MTP_SIZE=3"


# 1*DEP8 + 1*DEP8
- spec-decoding: "mtp"
conc-list: [ 128 ]
prefill:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "PREFILL_NODES=1"
decode:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "DECODE_NODES=1"
- "DECODE_MTP_SIZE=3"

# 1*DEP8 + 1*DEP8
- spec-decoding: "mtp"
conc-list: [ 64 ]
prefill:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "PREFILL_NODES=1"
decode:
num-worker: 1
tp: 8
ep: 8
dp-attn: true
additional-settings:
- "DECODE_NODES=1"
- "DECODE_MTP_SIZE=3"
Comment thread
Oseltamivir marked this conversation as resolved.

# 2*DEP8 + 1*DEP8
- spec-decoding: "mtp"
conc-list: [ 1024, 2048, 4096 ]
conc-list: [ 2048, 4096 ]
prefill:
num-worker: 2
tp: 8
Expand All @@ -2088,7 +2139,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp:
additional-settings:
- "DECODE_NODES=1"
- "DECODE_MTP_SIZE=1"


# DSv4-Pro FP4 on MI355X via SGLang. Uses a rocm720 mi35x image built off the
# amd/deepseek_v4 branch in sgl-project/sglang; the SHA is encoded in the
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/multi_node/amd_utils/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,11 @@ else
# =========================================================================

export SGLANG_USE_AITER=1
export AITER_LOG_LEVEL=ERROR

export SGLANG_MORI_DISPATCH_DTYPE=auto
export SGLANG_MORI_FP8_COMB=true
export MORI_COMBINE_DTYPE_PREFILL=fp8_direct_cast
export MORI_COMBINE_DTYPE_DECODE=fp8
export SGLANG_MORI_QP_PER_TRANSFER=4
export SGLANG_MORI_NUM_WORKERS=4
export MORI_IO_SQ_BACKOFF_TIMEOUT_US=50000
Expand Down
7 changes: 7 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3201,6 +3201,13 @@
- "MoRI conn.py overlay (48e459bd) via job.slurm; launcher qwen3.5_fp4_mi355x_sglang-disagg.sh"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1579

- config-keys:
- dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp
description:
- "Bump the image to May 26"
- "Add conc 128/256 new sweep point"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1584

- config-keys:
- glm5-fp8-gb300-dynamo-sglang
description:
Expand Down