diff --git a/deepmd/pd/model/model/make_model.py b/deepmd/pd/model/model/make_model.py index 321c939061..a127c9c367 100644 --- a/deepmd/pd/model/model/make_model.py +++ b/deepmd/pd/model/model/make_model.py @@ -231,8 +231,6 @@ def change_out_bias( merged, bias_adjust_mode=bias_adjust_mode, ) - if bias_adjust_mode == "set-by-statistic": - self.atomic_model.compute_fitting_input_stat(merged) def forward_common_lower( self, diff --git a/deepmd/pd/train/training.py b/deepmd/pd/train/training.py index 7ba0255494..3b7be6263e 100644 --- a/deepmd/pd/train/training.py +++ b/deepmd/pd/train/training.py @@ -1348,6 +1348,13 @@ def model_change_out_bias( ) new_bias = deepcopy(_model.get_out_bias()) + from deepmd.pd.model.model.dp_model import ( + DPModelCommon, + ) + + if isinstance(_model, DPModelCommon) and _bias_adjust_mode == "set-by-statistic": + _model.get_fitting_net().compute_input_stats(_sample_func) + model_type_map = _model.get_type_map() log.info( f"Change output bias of {model_type_map!s} " diff --git a/deepmd/pt/model/model/make_model.py b/deepmd/pt/model/model/make_model.py index 87a1d6b9c5..ad4c35bcc9 100644 --- a/deepmd/pt/model/model/make_model.py +++ b/deepmd/pt/model/model/make_model.py @@ -233,8 +233,6 @@ def change_out_bias( merged, bias_adjust_mode=bias_adjust_mode, ) - if bias_adjust_mode == "set-by-statistic": - self.atomic_model.compute_fitting_input_stat(merged) def forward_common_lower( self, diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py index be480fda3f..234bc9b51b 100644 --- a/deepmd/pt/train/training.py +++ b/deepmd/pt/train/training.py @@ -1755,6 +1755,13 @@ def model_change_out_bias( ) new_bias = deepcopy(_model.get_out_bias()) + from deepmd.pt.model.model.dp_model import ( + DPModelCommon, + ) + + if isinstance(_model, DPModelCommon) and _bias_adjust_mode == "set-by-statistic": + _model.get_fitting_net().compute_input_stats(_sample_func) + model_type_map = _model.get_type_map() log.info( f"Change output bias of {model_type_map!s} from {to_numpy_array(old_bias).reshape(-1)!s} to {to_numpy_array(new_bias).reshape(-1)!s}." diff --git a/source/tests/pd/test_training.py b/source/tests/pd/test_training.py index 692a8fb32f..625d1996de 100644 --- a/source/tests/pd/test_training.py +++ b/source/tests/pd/test_training.py @@ -236,5 +236,122 @@ def tearDown(self) -> None: DPTrainTest.tearDown(self) +class TestModelChangeOutBiasFittingStat(unittest.TestCase): + """Verify model_change_out_bias produces the same fitting stat as the old code path. + + The old code called compute_fitting_input_stat inside change_out_bias (make_model.py). + The new code calls get_fitting_net().compute_input_stats() separately in + model_change_out_bias (training.py). This test verifies they produce identical + out_bias, fparam_avg, and fparam_inv_std. + """ + + def test_fitting_stat_consistency(self) -> None: + from deepmd.pd.model.model import get_model as get_model_pd + from deepmd.pd.model.model.ener_model import EnergyModel as EnergyModelPD + from deepmd.pd.train.training import ( + model_change_out_bias, + ) + from deepmd.pd.utils.utils import to_numpy_array as paddle_to_numpy + from deepmd.pd.utils.utils import to_paddle_tensor as numpy_to_paddle + from deepmd.utils.argcheck import model_args as model_args_fn + + # Build a model with numb_fparam=2 so fitting stat is non-trivial + model_params = model_args_fn().normalize_value( + { + "type_map": ["O", "H"], + "descriptor": { + "type": "se_e2_a", + "sel": [20, 20], + "rcut_smth": 0.50, + "rcut": 6.00, + "neuron": [3, 6], + "resnet_dt": False, + "axis_neuron": 2, + "precision": "float64", + "type_one_side": True, + "seed": 1, + }, + "fitting_net": { + "neuron": [5, 5], + "resnet_dt": True, + "precision": "float64", + "seed": 1, + "numb_fparam": 2, + }, + }, + trim_pattern="_*", + ) + + # Create two identical models via serialize/deserialize + model_orig = get_model_pd(model_params) + serialized = model_orig.serialize() + model_a = EnergyModelPD.deserialize(deepcopy(serialized)) + model_b = EnergyModelPD.deserialize(deepcopy(serialized)) + + # Build mock stat data with fparam + nframes = 4 + natoms = 6 + coords = np.random.default_rng(42).random((nframes, natoms, 3)) * 13.0 + atype = np.array([[0, 0, 1, 1, 1, 1]] * nframes, dtype=np.int32) + box = np.tile( + np.eye(3, dtype=np.float64).reshape(1, 3, 3) * 13.0, (nframes, 1, 1) + ) + natoms_data = np.array([[6, 6, 2, 4]] * nframes, dtype=np.int32) + energy = np.array([10.0, 20.0, 15.0, 25.0]).reshape(nframes, 1) + # fparam with varying values so mean != 0 and std != 0 + fparam = np.array( + [[1.0, 3.0], [5.0, 7.0], [2.0, 8.0], [6.0, 4.0]], dtype=np.float64 + ) + + merged = [ + { + "coord": numpy_to_paddle(coords), + "atype": numpy_to_paddle(atype), + "atype_ext": numpy_to_paddle(atype), + "box": numpy_to_paddle(box), + "natoms": numpy_to_paddle(natoms_data), + "energy": numpy_to_paddle(energy), + "find_energy": np.float32(1.0), + "fparam": numpy_to_paddle(fparam), + "find_fparam": np.float32(1.0), + } + ] + + # Model A: simulate the OLD code path + # old change_out_bias called both bias adjustment + compute_fitting_input_stat + model_a.change_out_bias(merged, bias_adjust_mode="set-by-statistic") + model_a.atomic_model.compute_fitting_input_stat(merged) + + # Model B: use the NEW code path via model_change_out_bias + sample_func = lambda: merged # noqa: E731 + model_change_out_bias(model_b, sample_func, "set-by-statistic") + + # Compare out_bias + bias_a = paddle_to_numpy(model_a.get_out_bias()) + bias_b = paddle_to_numpy(model_b.get_out_bias()) + np.testing.assert_allclose(bias_a, bias_b, rtol=1e-10, atol=1e-10) + + # Compare fparam_avg and fparam_inv_std + fit_a = model_a.get_fitting_net() + fit_b = model_b.get_fitting_net() + fparam_avg_a = paddle_to_numpy(fit_a.fparam_avg) + fparam_avg_b = paddle_to_numpy(fit_b.fparam_avg) + fparam_inv_std_a = paddle_to_numpy(fit_a.fparam_inv_std) + fparam_inv_std_b = paddle_to_numpy(fit_b.fparam_inv_std) + + np.testing.assert_allclose(fparam_avg_a, fparam_avg_b, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose( + fparam_inv_std_a, fparam_inv_std_b, rtol=1e-10, atol=1e-10 + ) + + # Verify non-trivial: avg should not be zeros, inv_std should not be ones + assert not np.allclose(fparam_avg_a, 0.0), ( + "fparam_avg is still zero — stat was not computed" + ) + assert not np.allclose(fparam_inv_std_a, 1.0), ( + "fparam_inv_std is still ones — stat was not computed" + ) + + if __name__ == "__main__": unittest.main() diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index ff4f00f912..7ab37253bd 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -10,6 +10,7 @@ Path, ) +import numpy as np import torch from deepmd.pt.entrypoints.main import ( @@ -608,5 +609,122 @@ def tearDown(self) -> None: DPTrainTest.tearDown(self) +class TestModelChangeOutBiasFittingStat(unittest.TestCase): + """Verify model_change_out_bias produces the same fitting stat as the old code path. + + The old code called compute_fitting_input_stat inside change_out_bias (make_model.py). + The new code calls get_fitting_net().compute_input_stats() separately in + model_change_out_bias (training.py). This test verifies they produce identical + out_bias, fparam_avg, and fparam_inv_std. + """ + + def test_fitting_stat_consistency(self) -> None: + from deepmd.pt.model.model import get_model as get_model_pt + from deepmd.pt.model.model.ener_model import EnergyModel as EnergyModelPT + from deepmd.pt.train.training import ( + model_change_out_bias, + ) + from deepmd.pt.utils.utils import to_numpy_array as torch_to_numpy + from deepmd.pt.utils.utils import to_torch_tensor as numpy_to_torch + from deepmd.utils.argcheck import model_args as model_args_fn + + # Build a model with numb_fparam=2 so fitting stat is non-trivial + model_params = model_args_fn().normalize_value( + { + "type_map": ["O", "H"], + "descriptor": { + "type": "se_e2_a", + "sel": [20, 20], + "rcut_smth": 0.50, + "rcut": 6.00, + "neuron": [3, 6], + "resnet_dt": False, + "axis_neuron": 2, + "precision": "float64", + "type_one_side": True, + "seed": 1, + }, + "fitting_net": { + "neuron": [5, 5], + "resnet_dt": True, + "precision": "float64", + "seed": 1, + "numb_fparam": 2, + }, + }, + trim_pattern="_*", + ) + + # Create two identical models via serialize/deserialize + model_orig = get_model_pt(model_params) + serialized = model_orig.serialize() + model_a = EnergyModelPT.deserialize(deepcopy(serialized)) + model_b = EnergyModelPT.deserialize(deepcopy(serialized)) + + # Build mock stat data with fparam + nframes = 4 + natoms = 6 + coords = np.random.default_rng(42).random((nframes, natoms, 3)) * 13.0 + atype = np.array([[0, 0, 1, 1, 1, 1]] * nframes, dtype=np.int32) + box = np.tile( + np.eye(3, dtype=np.float64).reshape(1, 3, 3) * 13.0, (nframes, 1, 1) + ) + natoms_data = np.array([[6, 6, 2, 4]] * nframes, dtype=np.int32) + energy = np.array([10.0, 20.0, 15.0, 25.0]).reshape(nframes, 1) + # fparam with varying values so mean != 0 and std != 0 + fparam = np.array( + [[1.0, 3.0], [5.0, 7.0], [2.0, 8.0], [6.0, 4.0]], dtype=np.float64 + ) + + merged = [ + { + "coord": numpy_to_torch(coords), + "atype": numpy_to_torch(atype), + "atype_ext": numpy_to_torch(atype), + "box": numpy_to_torch(box), + "natoms": numpy_to_torch(natoms_data), + "energy": numpy_to_torch(energy), + "find_energy": np.float32(1.0), + "fparam": numpy_to_torch(fparam), + "find_fparam": np.float32(1.0), + } + ] + + # Model A: simulate the OLD code path + # old change_out_bias called both bias adjustment + compute_fitting_input_stat + model_a.change_out_bias(merged, bias_adjust_mode="set-by-statistic") + model_a.atomic_model.compute_fitting_input_stat(merged) + + # Model B: use the NEW code path via model_change_out_bias + sample_func = lambda: merged # noqa: E731 + model_change_out_bias(model_b, sample_func, "set-by-statistic") + + # Compare out_bias + bias_a = torch_to_numpy(model_a.get_out_bias()) + bias_b = torch_to_numpy(model_b.get_out_bias()) + np.testing.assert_allclose(bias_a, bias_b, rtol=1e-10, atol=1e-10) + + # Compare fparam_avg and fparam_inv_std + fit_a = model_a.get_fitting_net() + fit_b = model_b.get_fitting_net() + fparam_avg_a = torch_to_numpy(fit_a.fparam_avg) + fparam_avg_b = torch_to_numpy(fit_b.fparam_avg) + fparam_inv_std_a = torch_to_numpy(fit_a.fparam_inv_std) + fparam_inv_std_b = torch_to_numpy(fit_b.fparam_inv_std) + + np.testing.assert_allclose(fparam_avg_a, fparam_avg_b, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose( + fparam_inv_std_a, fparam_inv_std_b, rtol=1e-10, atol=1e-10 + ) + + # Verify non-trivial: avg should not be zeros, inv_std should not be ones + assert not np.allclose(fparam_avg_a, 0.0), ( + "fparam_avg is still zero — stat was not computed" + ) + assert not np.allclose(fparam_inv_std_a, 1.0), ( + "fparam_inv_std is still ones — stat was not computed" + ) + + if __name__ == "__main__": unittest.main()