@@ -1090,7 +1090,7 @@ def _process_weight_scale(name: str, weight_scales: list[paddle.Tensor], process
10901090 "down_proj_in_scale" : weight_key_map .get ("down_proj_expert_in_scale_key" , None ),
10911091 }
10921092 for name , value in scale_key_map .items ():
1093- if value is None :
1093+ if hasattr ( layer , name ) and value is None :
10941094 raise ValueError (f"scale { name } should not be none in w4a8 mode." )
10951095
10961096 # 2. Extract scale tensor from state dict
@@ -1111,8 +1111,9 @@ def _process_weight_scale(name: str, weight_scales: list[paddle.Tensor], process
11111111
11121112 for expert_idx in logical_expert_ids :
11131113 for name , scale_key_template in scale_key_map .items ():
1114- scale_tensor = _extract_scale_tensor (layer , state_dict , scale_key_template , expert_idx )
1115- scale_weight_map [name ].append (scale_tensor )
1114+ if hasattr (layer , name ):
1115+ scale_tensor = _extract_scale_tensor (layer , state_dict , scale_key_template , expert_idx )
1116+ scale_weight_map [name ].append (scale_tensor )
11161117
11171118 for i , weight_scale_name in enumerate (["up_gate_proj_weight_scale" , "down_proj_weight_scale" ]):
11181119 in_scale_name = weight_scale_name .replace ("_weight_scale" , "_in_scale" )
0 commit comments