From b24803e9f3bba49e9137e0cf9a405ca73d13a498 Mon Sep 17 00:00:00 2001 From: weimingc <17592131+meenchen@users.noreply.github.com> Date: Tue, 23 Dec 2025 23:40:10 +0000 Subject: [PATCH] Fix dynamic input quant for AWQ Signed-off-by: weimingc <17592131+meenchen@users.noreply.github.com> --- modelopt/torch/quantization/model_calib.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/modelopt/torch/quantization/model_calib.py b/modelopt/torch/quantization/model_calib.py index d4cf249fe..d875d9c5b 100644 --- a/modelopt/torch/quantization/model_calib.py +++ b/modelopt/torch/quantization/model_calib.py @@ -751,14 +751,18 @@ def postprocess(module, name): delattr(module.weight_quantizer, "_pre_quant_scale") if hasattr(module.input_quantizer, "_pre_quant_scale"): delattr(module.input_quantizer, "_pre_quant_scale") - if module.awq_lite.is_input_quantized and module.input_quantizer.amax is not None: - act_amax = module.input_quantizer.amax - # TODO: make this a buffer after we support only heterogeneous checkpointing for MCore - module.input_quantizer._amax_for_smoothing = act_amax.cpu() - module.input_quantizer.reset_amax() - module.input_quantizer.axis = None - module.input_quantizer.amax = act_amax.amax() - module.input_quantizer.enable() + if module.awq_lite.is_input_quantized: + if module.input_quantizer.amax is not None: + act_amax = module.input_quantizer.amax + # TODO: make this a buffer after we support only heterogeneous checkpointing for MCore + module.input_quantizer._amax_for_smoothing = act_amax.cpu() + module.input_quantizer.reset_amax() + module.input_quantizer.axis = None + module.input_quantizer.amax = act_amax.amax() + module.input_quantizer.enable() + # for dynamic quantization, there is no amax, so we just enable the quantizer + else: + module.input_quantizer.enable() if module.awq_lite.is_enabled: apply_pre_quant_scale_and_smooth(module, 1.0 / module.awq_lite.best_scale)