From b6e44712f4c84f18ae77767093d3791559f5632d Mon Sep 17 00:00:00 2001 From: chang-wenbin Date: Tue, 24 Mar 2026 17:33:03 +0800 Subject: [PATCH 1/2] supportFD use PFCC DeepGemm --- fastdeploy/model_executor/layers/quantization/fp8_utils.py | 6 ++++-- requirements.txt | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fastdeploy/model_executor/layers/quantization/fp8_utils.py b/fastdeploy/model_executor/layers/quantization/fp8_utils.py index 65d30d4004d..52cc1665386 100644 --- a/fastdeploy/model_executor/layers/quantization/fp8_utils.py +++ b/fastdeploy/model_executor/layers/quantization/fp8_utils.py @@ -80,8 +80,10 @@ def load_deep_gemm(): logger.info("Detected sm100, use PFCC DeepGEMM") else: - logger.info("use FastDeploy DeepGEMM") - import fastdeploy.model_executor.ops.gpu.deep_gemm as deep_gemm + paddle.enable_compat(scope={"deep_gemm"}) + import deep_gemm as deep_gemm + + logger.info("Use PFCC DeepGEMM") else: deep_gemm = None return deep_gemm diff --git a/requirements.txt b/requirements.txt index 66ed714045b..15784ab3cf8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,4 +49,5 @@ py-cpuinfo flashinfer-python-paddle flash_mask @ https://paddle-qa.bj.bcebos.com/ernie/flash_mask-4.0.post20260128-py3-none-any.whl arctic_inference @ https://paddle-qa.bj.bcebos.com/ernie/arctic_inference-0.1.3-cp310-cp310-linux_x86_64.whl +deep-gemm-cpp @ https://paddle-qa.bj.bcebos.com/ernie/deep_gemm_cpp-2.2.0+local-cp310-cp310-linux_x86_64.whl transformers>=4.55.1,<5.0.0 From b62c707d22f58f82eb02096ae14de00df0a69535 Mon Sep 17 00:00:00 2001 From: chang-wenbin Date: Wed, 25 Mar 2026 15:29:23 +0800 Subject: [PATCH 2/2] update deepgemm_whl --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 15784ab3cf8..ff1ae91aceb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,5 +49,5 @@ py-cpuinfo flashinfer-python-paddle flash_mask @ https://paddle-qa.bj.bcebos.com/ernie/flash_mask-4.0.post20260128-py3-none-any.whl arctic_inference @ https://paddle-qa.bj.bcebos.com/ernie/arctic_inference-0.1.3-cp310-cp310-linux_x86_64.whl -deep-gemm-cpp @ https://paddle-qa.bj.bcebos.com/ernie/deep_gemm_cpp-2.2.0+local-cp310-cp310-linux_x86_64.whl +deep-gemm-cpp @ https://paddle-qa.bj.bcebos.com/ernie/deep_gemm_cpp-2.2.0%2Blocal-cp310-cp310-linux_x86_64.whl transformers>=4.55.1,<5.0.0