From c0837970b90569fe4c45c2e34c10a08410122c98 Mon Sep 17 00:00:00 2001 From: xuanyuanminzheng Date: Wed, 10 Jun 2026 15:59:35 +0800 Subject: [PATCH] fp8 triton moe config. --- .../layers/moe/fused_moe_triton_backend.py | 35 ++++++++++++++----- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py index e3e9c5a0e95..f3867ce5c3c 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_triton_backend.py @@ -747,14 +747,33 @@ def apply( "num_stages": 4, } if token_num <= E: - config = { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 4, - "num_stages": 4, - } + if token_num <= 16: + config = { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4, + } + elif token_num <= 32: + config = { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3, + } + else: + config = { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3, + } sorted_token_ids, expert_ids, num_tokens_post_padded = tritonmoe_preprocess_func( topk_ids, num_local_experts, config["BLOCK_SIZE_M"]