From b78ecefc16499eaf7cbc5d10fb9d270369823898 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 25 Feb 2026 13:28:06 +0800 Subject: [PATCH 1/7] compute_output_stats_global now applies atom_exclude_types mask to natoms before computing output bias --- deepmd/dpmodel/utils/stat.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/deepmd/dpmodel/utils/stat.py b/deepmd/dpmodel/utils/stat.py index 1cbaad0275..29ea128a91 100644 --- a/deepmd/dpmodel/utils/stat.py +++ b/deepmd/dpmodel/utils/stat.py @@ -14,6 +14,9 @@ from deepmd.dpmodel.common import ( to_numpy_array, ) +from deepmd.dpmodel.utils.exclude_mask import ( + AtomExcludeMask, +) from deepmd.utils.out_stat import ( compute_stats_do_not_distinguish_types, compute_stats_from_atomic, @@ -360,6 +363,12 @@ def compute_output_stats_global( } natoms_key = "natoms" + for system in sampled: + if "atom_exclude_types" in system: + type_mask = AtomExcludeMask( + ntypes, system["atom_exclude_types"] + ).get_type_mask() + system[natoms_key][:, 2:] *= type_mask.reshape(1, -1) input_natoms = { kk: [ to_numpy_array(sampled[idx][natoms_key]) From 4390609103d66a3d80c1fb9a70741bb51ca29631 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 25 Feb 2026 13:57:36 +0800 Subject: [PATCH 2/7] fix stat: inconsistency in global sampled index between dp and pt backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. elif → if in compute_output_stats: Systems with both find_atom_ and find_ now go into both atomic_sampled_idx and global_sampled_idx 2. pt and pd: compute_output_stats_global updated to accept global_sampled_idx parameter (matching dpmodel's signature) and use precomputed indices instead of inline filtering. Also converts to numpy early via to_numpy_array during gathering, then uses np.concatenate instead of torch.cat/paddle.concat + to_numpy_array. 3. pt and pd: compute_output_stats_atomic updated to accept atomic_sampled_idx parameter (matching dpmodel's signature) with early return for empty indices, and same numpy-first gathering pattern. --- deepmd/dpmodel/utils/stat.py | 4 +-- deepmd/pd/utils/stat.py | 68 +++++++++++++++++------------------- deepmd/pt/utils/stat.py | 60 +++++++++++++++---------------- 3 files changed, 62 insertions(+), 70 deletions(-) diff --git a/deepmd/dpmodel/utils/stat.py b/deepmd/dpmodel/utils/stat.py index 29ea128a91..870d420008 100644 --- a/deepmd/dpmodel/utils/stat.py +++ b/deepmd/dpmodel/utils/stat.py @@ -248,10 +248,8 @@ def compute_output_stats( system["find_atom_" + kk] > 0.0 ): atomic_sampled_idx[kk].append(idx) - elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0): + if (("find_" + kk) in system) and (system["find_" + kk] > 0.0): global_sampled_idx[kk].append(idx) - else: - continue # use index to gather model predictions for the corresponding systems. model_pred_g = ( diff --git a/deepmd/pd/utils/stat.py b/deepmd/pd/utils/stat.py index 23c6c508a1..f3854e91ad 100644 --- a/deepmd/pd/utils/stat.py +++ b/deepmd/pd/utils/stat.py @@ -324,12 +324,9 @@ def compute_output_stats( system["find_atom_" + kk] > 0.0 ): atomic_sampled_idx[kk].append(idx) - elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0): + if (("find_" + kk) in system) and (system["find_" + kk] > 0.0): global_sampled_idx[kk].append(idx) - else: - continue - # use index to gather model predictions for the corresponding systems. model_pred_g = ( @@ -378,14 +375,16 @@ def compute_output_stats( keys, rcond, preset_bias, - model_pred_g, + global_sampled_idx, stats_distinguish_types, intensive, + model_pred_g, ) bias_atom_a, std_atom_a = compute_output_stats_atomic( sampled, ntypes, keys, + atomic_sampled_idx, model_pred_a, ) @@ -422,22 +421,21 @@ def compute_output_stats_global( keys: list[str], rcond: float | None = None, preset_bias: dict[str, list[paddle.Tensor | None]] | None = None, - model_pred: dict[str, np.ndarray] | None = None, + global_sampled_idx: dict | None = None, stats_distinguish_types: bool = True, intensive: bool = False, + model_pred: dict[str, np.ndarray] | None = None, ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]: """This function only handle stat computation from reduced global labels.""" - # return directly if model predict is empty for global - if model_pred == {}: + # return directly if no global samples + if global_sampled_idx is None or all( + len(v) == 0 for v in global_sampled_idx.values() + ): return {}, {} # get label dict from sample; for each key, only picking the system with global labels. outputs = { - kk: [ - system[kk] - for system in sampled - if kk in system and system.get(f"find_{kk}", 0) > 0 - ] + kk: [to_numpy_array(sampled[idx][kk]) for idx in global_sampled_idx.get(kk, [])] for kk in keys } @@ -452,22 +450,18 @@ def compute_output_stats_global( input_natoms = { kk: [ - item[natoms_key] - for item in sampled - if kk in item and item.get(f"find_{kk}", 0) > 0 + to_numpy_array(sampled[idx][natoms_key]) + for idx in global_sampled_idx.get(kk, []) ] for kk in keys } # shape: (nframes, ndim) merged_output = { - kk: to_numpy_array(paddle.concat(outputs[kk])) - for kk in keys - if len(outputs[kk]) > 0 + kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0 } # shape: (nframes, ntypes) - merged_natoms = { - kk: to_numpy_array(paddle.concat(input_natoms[kk])[:, 2:]) + kk: np.concatenate(input_natoms[kk])[:, 2:] for kk in keys if len(input_natoms[kk]) > 0 } @@ -554,49 +548,51 @@ def compute_output_stats_atomic( sampled: list[dict], ntypes: int, keys: list[str], + atomic_sampled_idx: dict | None = None, model_pred: dict[str, np.ndarray] | None = None, ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]: + """Compute output statistics from atomic labels.""" + # return directly if no atomic samples + if atomic_sampled_idx is None or all( + len(v) == 0 for v in atomic_sampled_idx.values() + ): + return {}, {} + # get label dict from sample; for each key, only picking the system with atomic labels. outputs = { kk: [ - system["atom_" + kk] - for system in sampled - if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0 + to_numpy_array(sampled[idx]["atom_" + kk]) + for idx in atomic_sampled_idx.get(kk, []) ] for kk in keys } natoms = { kk: [ - system["atype"] - for system in sampled - if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0 + to_numpy_array(sampled[idx]["atype"]) + for idx in atomic_sampled_idx.get(kk, []) ] for kk in keys } # reshape outputs [nframes, nloc * ndim] --> reshape to [nframes * nloc, 1, ndim] for concatenation # reshape natoms [nframes, nloc] --> reshape to [nframes * nolc, 1] for concatenation - natoms = {k: [sys_v.reshape([-1, 1]) for sys_v in v] for k, v in natoms.items()} + natoms = {k: [sys_v.reshape(-1, 1) for sys_v in v] for k, v in natoms.items()} outputs = { k: [ - sys.reshape([natoms[k][sys_idx].shape[0], 1, -1]) + sys.reshape(natoms[k][sys_idx].shape[0], 1, -1) for sys_idx, sys in enumerate(v) ] for k, v in outputs.items() } merged_output = { - kk: to_numpy_array(paddle.concat(outputs[kk])) - for kk in keys - if len(outputs[kk]) > 0 + kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0 } merged_natoms = { - kk: to_numpy_array(paddle.concat(natoms[kk])) - for kk in keys - if len(natoms[kk]) > 0 + kk: np.concatenate(natoms[kk]) for kk in keys if len(natoms[kk]) > 0 } # reshape merged data to [nf, nloc, ndim] merged_output = { - kk: merged_output[kk].reshape([*merged_natoms[kk].shape, -1]) + kk: merged_output[kk].reshape((*merged_natoms[kk].shape, -1)) for kk in merged_output } diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index cf82461a7e..657cfa1f17 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -324,12 +324,9 @@ def compute_output_stats( system["find_atom_" + kk] > 0.0 ): atomic_sampled_idx[kk].append(idx) - elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0): + if (("find_" + kk) in system) and (system["find_" + kk] > 0.0): global_sampled_idx[kk].append(idx) - else: - continue - # use index to gather model predictions for the corresponding systems. model_pred_g = ( @@ -378,14 +375,16 @@ def compute_output_stats( keys, rcond, preset_bias, - model_pred_g, + global_sampled_idx, stats_distinguish_types, intensive, + model_pred_g, ) bias_atom_a, std_atom_a = compute_output_stats_atomic( sampled, ntypes, keys, + atomic_sampled_idx, model_pred_a, ) @@ -422,22 +421,21 @@ def compute_output_stats_global( keys: list[str], rcond: float | None = None, preset_bias: dict[str, list[np.ndarray | None]] | None = None, - model_pred: dict[str, np.ndarray] | None = None, + global_sampled_idx: dict | None = None, stats_distinguish_types: bool = True, intensive: bool = False, + model_pred: dict[str, np.ndarray] | None = None, ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]: """This function only handle stat computation from reduced global labels.""" - # return directly if model predict is empty for global - if model_pred == {}: + # return directly if no global samples + if global_sampled_idx is None or all( + len(v) == 0 for v in global_sampled_idx.values() + ): return {}, {} # get label dict from sample; for each key, only picking the system with global labels. outputs = { - kk: [ - system[kk] - for system in sampled - if kk in system and system.get(f"find_{kk}", 0) > 0 - ] + kk: [to_numpy_array(sampled[idx][kk]) for idx in global_sampled_idx.get(kk, [])] for kk in keys } @@ -452,22 +450,18 @@ def compute_output_stats_global( input_natoms = { kk: [ - item[natoms_key] - for item in sampled - if kk in item and item.get(f"find_{kk}", 0) > 0 + to_numpy_array(sampled[idx][natoms_key]) + for idx in global_sampled_idx.get(kk, []) ] for kk in keys } # shape: (nframes, ndim) merged_output = { - kk: to_numpy_array(torch.cat(outputs[kk])) - for kk in keys - if len(outputs[kk]) > 0 + kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0 } # shape: (nframes, ntypes) - merged_natoms = { - kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) + kk: np.concatenate(input_natoms[kk])[:, 2:] for kk in keys if len(input_natoms[kk]) > 0 } @@ -551,22 +545,28 @@ def compute_output_stats_atomic( sampled: list[dict], ntypes: int, keys: list[str], + atomic_sampled_idx: dict | None = None, model_pred: dict[str, np.ndarray] | None = None, ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]: + """Compute output statistics from atomic labels.""" + # return directly if no atomic samples + if atomic_sampled_idx is None or all( + len(v) == 0 for v in atomic_sampled_idx.values() + ): + return {}, {} + # get label dict from sample; for each key, only picking the system with atomic labels. outputs = { kk: [ - system["atom_" + kk] - for system in sampled - if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0 + to_numpy_array(sampled[idx]["atom_" + kk]) + for idx in atomic_sampled_idx.get(kk, []) ] for kk in keys } natoms = { kk: [ - system["atype"] - for system in sampled - if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0 + to_numpy_array(sampled[idx]["atype"]) + for idx in atomic_sampled_idx.get(kk, []) ] for kk in keys } @@ -582,12 +582,10 @@ def compute_output_stats_atomic( } merged_output = { - kk: to_numpy_array(torch.cat(outputs[kk])) - for kk in keys - if len(outputs[kk]) > 0 + kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0 } merged_natoms = { - kk: to_numpy_array(torch.cat(natoms[kk])) for kk in keys if len(natoms[kk]) > 0 + kk: np.concatenate(natoms[kk]) for kk in keys if len(natoms[kk]) > 0 } # reshape merged data to [nf, nloc, ndim] merged_output = { From 1d1ca0988013e7ccdddf22c608447893612b187b Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 25 Feb 2026 15:20:55 +0800 Subject: [PATCH 3/7] support mixed type in dpmodel, add consistency test for backends --- deepmd/dpmodel/utils/stat.py | 3 +- source/tests/consistent/utils/__init__.py | 1 + source/tests/consistent/utils/test_stat.py | 276 +++++++++++++++++++++ 3 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 source/tests/consistent/utils/__init__.py create mode 100644 source/tests/consistent/utils/test_stat.py diff --git a/deepmd/dpmodel/utils/stat.py b/deepmd/dpmodel/utils/stat.py index 870d420008..12085300a8 100644 --- a/deepmd/dpmodel/utils/stat.py +++ b/deepmd/dpmodel/utils/stat.py @@ -360,7 +360,8 @@ def compute_output_stats_global( for kk in keys } - natoms_key = "natoms" + data_mixed_type = "real_natoms_vec" in sampled[0] + natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" for system in sampled: if "atom_exclude_types" in system: type_mask = AtomExcludeMask( diff --git a/source/tests/consistent/utils/__init__.py b/source/tests/consistent/utils/__init__.py new file mode 100644 index 0000000000..6ceb116d85 --- /dev/null +++ b/source/tests/consistent/utils/__init__.py @@ -0,0 +1 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later diff --git a/source/tests/consistent/utils/test_stat.py b/source/tests/consistent/utils/test_stat.py new file mode 100644 index 0000000000..7acb41cb61 --- /dev/null +++ b/source/tests/consistent/utils/test_stat.py @@ -0,0 +1,276 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +"""Cross-backend consistency tests for compute_output_stats.""" + +import copy +from collections import ( + defaultdict, +) + +import numpy as np +import pytest + +from deepmd.dpmodel.utils.stat import compute_output_stats as compute_output_stats_dp +from deepmd.dpmodel.utils.stat import ( + compute_output_stats_atomic as compute_output_stats_atomic_dp, +) +from deepmd.dpmodel.utils.stat import ( + compute_output_stats_global as compute_output_stats_global_dp, +) + +from ..common import ( + INSTALLED_PT, +) + +if INSTALLED_PT: + import torch + + from deepmd.pt.utils.stat import compute_output_stats as compute_output_stats_pt + from deepmd.pt.utils.stat import ( + compute_output_stats_atomic as compute_output_stats_atomic_pt, + ) + from deepmd.pt.utils.stat import ( + compute_output_stats_global as compute_output_stats_global_pt, + ) + from deepmd.pt.utils.utils import ( + to_numpy_array, + ) + +NTYPES = 2 +NFRAMES = 2 +NLOC = 4 + + +def _make_data( + has_global: bool, + has_atomic: bool, + mixed_type: bool, + exclude_types: list[int], +) -> tuple[list[dict], list[dict], dict, dict]: + """Build identical stat data for dpmodel (numpy) and pt (torch) backends. + + Returns + ------- + sampled_dp : list[dict] + Data with numpy arrays for the dpmodel backend. + sampled_pt : list[dict] + Data with torch tensors for the pt backend. + global_sampled_idx : dict + Precomputed indices for global labels. + atomic_sampled_idx : dict + Precomputed indices for atomic labels. + """ + rng = np.random.default_rng(42) + + # atype: [nframes, nloc] + atype = np.array([[0, 0, 1, 1], [0, 1, 1, 0]], dtype=np.int64) + + # natoms: [nframes, 2+ntypes] = [nloc_total, nloc_real, count_type0, count_type1] + natoms = np.array([[4, 4, 2, 2], [4, 4, 2, 2]], dtype=np.int64) + + if mixed_type: + # For mixed type, atype may have different counts per frame, + # but natoms is padded uniformly. real_natoms_vec has actual counts. + atype = np.array([[0, 0, 1, 1], [0, 1, 1, 1]], dtype=np.int64) + real_natoms_vec = np.array([[4, 4, 2, 2], [4, 4, 1, 3]], dtype=np.int64) + + # Atomic labels: [nframes, nloc, 1] + atom_energy = rng.normal(size=(NFRAMES, NLOC, 1)) + # Global labels: sum of atom_energy per frame -> [nframes, 1] + energy = atom_energy.sum(axis=1) + + keys = ["energy"] + + # Build a single system dict (both frames in one system) + system: dict = { + "atype": atype, + "natoms": natoms.copy(), + } + if mixed_type: + system["real_natoms_vec"] = real_natoms_vec.copy() + + if has_global: + system["energy"] = energy + system["find_energy"] = np.float32(1.0) + if has_atomic: + system["atom_energy"] = atom_energy + system["find_atom_energy"] = np.float32(1.0) + if exclude_types: + system["atom_exclude_types"] = exclude_types + + sampled_np = [system] + + # Convert to torch tensors for pt backend + def _to_torch(d: dict) -> dict: + out = {} + for k, v in d.items(): + if isinstance(v, np.ndarray): + out[k] = torch.from_numpy(v.copy()) + elif isinstance(v, np.float32): + out[k] = v + else: + out[k] = v + return out + + # Deep-copy before passing to each backend (both may mutate natoms in-place) + sampled_dp = copy.deepcopy(sampled_np) + sampled_pt = [_to_torch(s) for s in copy.deepcopy(sampled_np)] + + # Precompute indices (same logic used by both backends' compute_output_stats) + atomic_sampled_idx: dict = defaultdict(list) + global_sampled_idx: dict = defaultdict(list) + for kk in keys: + for idx, s in enumerate(sampled_np): + if ("find_atom_" + kk) in s and s["find_atom_" + kk] > 0.0: + atomic_sampled_idx[kk].append(idx) + if ("find_" + kk) in s and s["find_" + kk] > 0.0: + global_sampled_idx[kk].append(idx) + + return sampled_dp, sampled_pt, global_sampled_idx, atomic_sampled_idx + + +@pytest.mark.skipif(not INSTALLED_PT, reason="PyTorch is not installed") +class TestComputeOutputStatConsistency: + """Cross-backend consistency tests for compute_output_stats_global/atomic.""" + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_global(self, mixed_type, exclude_types) -> None: + """compute_output_stats_global dp vs pt.""" + sampled_dp, sampled_pt, global_idx, _ = _make_data( + has_global=True, + has_atomic=False, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_global_dp( + sampled_dp, NTYPES, keys, global_sampled_idx=global_idx + ) + pt_bias, pt_std = compute_output_stats_global_pt( + sampled_pt, NTYPES, keys, global_sampled_idx=global_idx + ) + + for kk in keys: + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pt_bias[kk], rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pt_std[kk], rtol=1e-10, atol=1e-10) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_atomic(self, mixed_type, exclude_types) -> None: + """compute_output_stats_atomic dp vs pt.""" + sampled_dp, sampled_pt, _, atomic_idx = _make_data( + has_global=False, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_atomic_dp( + sampled_dp, NTYPES, keys, atomic_sampled_idx=atomic_idx + ) + pt_bias, pt_std = compute_output_stats_atomic_pt( + sampled_pt, NTYPES, keys, atomic_sampled_idx=atomic_idx + ) + + for kk in keys: + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pt_bias[kk], rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pt_std[kk], rtol=1e-10, atol=1e-10) + + +@pytest.mark.skipif(not INSTALLED_PT, reason="PyTorch is not installed") +class TestComputeOutputStatFullConsistency: + """Cross-backend consistency tests for the top-level compute_output_stats.""" + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_global_only(self, mixed_type, exclude_types) -> None: + """Global labels only through full compute_output_stats.""" + sampled_dp, sampled_pt, _, _ = _make_data( + has_global=True, + has_atomic=False, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_dp( + sampled_dp, + NTYPES, + keys, + ) + pt_bias, pt_std = compute_output_stats_pt( + sampled_pt, + NTYPES, + keys, + ) + + for kk in keys: + pt_bias_np = to_numpy_array(pt_bias[kk]) + pt_std_np = to_numpy_array(pt_std[kk]) + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pt_bias_np, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pt_std_np, rtol=1e-10, atol=1e-10) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_atomic_only(self, mixed_type, exclude_types) -> None: + """Atomic labels only through full compute_output_stats.""" + sampled_dp, sampled_pt, _, _ = _make_data( + has_global=False, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_dp( + sampled_dp, + NTYPES, + keys, + ) + pt_bias, pt_std = compute_output_stats_pt( + sampled_pt, + NTYPES, + keys, + ) + + for kk in keys: + pt_bias_np = to_numpy_array(pt_bias[kk]) + pt_std_np = to_numpy_array(pt_std[kk]) + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pt_bias_np, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pt_std_np, rtol=1e-10, atol=1e-10) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_both_global_and_atomic(self, mixed_type, exclude_types) -> None: + """Both global and atomic labels through full compute_output_stats.""" + sampled_dp, sampled_pt, _, _ = _make_data( + has_global=True, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_dp( + sampled_dp, + NTYPES, + keys, + ) + pt_bias, pt_std = compute_output_stats_pt( + sampled_pt, + NTYPES, + keys, + ) + + for kk in keys: + pt_bias_np = to_numpy_array(pt_bias[kk]) + pt_std_np = to_numpy_array(pt_std[kk]) + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pt_bias_np, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pt_std_np, rtol=1e-10, atol=1e-10) From 1fe4507988fd180768c487362601b430a0da9fbd Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 25 Feb 2026 15:40:54 +0800 Subject: [PATCH 4/7] does not change input data --- deepmd/dpmodel/utils/stat.py | 26 +++---- deepmd/pd/utils/stat.py | 27 ++++---- deepmd/pt/utils/stat.py | 27 ++++---- source/tests/consistent/utils/test_stat.py | 80 ++++++++++++++++++---- 4 files changed, 106 insertions(+), 54 deletions(-) diff --git a/deepmd/dpmodel/utils/stat.py b/deepmd/dpmodel/utils/stat.py index 12085300a8..4f3b3b7757 100644 --- a/deepmd/dpmodel/utils/stat.py +++ b/deepmd/dpmodel/utils/stat.py @@ -362,19 +362,19 @@ def compute_output_stats_global( data_mixed_type = "real_natoms_vec" in sampled[0] natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" - for system in sampled: - if "atom_exclude_types" in system: - type_mask = AtomExcludeMask( - ntypes, system["atom_exclude_types"] - ).get_type_mask() - system[natoms_key][:, 2:] *= type_mask.reshape(1, -1) - input_natoms = { - kk: [ - to_numpy_array(sampled[idx][natoms_key]) - for idx in global_sampled_idx.get(kk, []) - ] - for kk in keys - } + input_natoms = {} + for kk in keys: + kk_natoms = [] + for idx in global_sampled_idx.get(kk, []): + nn = to_numpy_array(sampled[idx][natoms_key]) + if "atom_exclude_types" in sampled[idx]: + nn = nn.copy() + type_mask = AtomExcludeMask( + ntypes, sampled[idx]["atom_exclude_types"] + ).get_type_mask() + nn[:, 2:] *= type_mask.reshape(1, -1) + kk_natoms.append(nn) + input_natoms[kk] = kk_natoms # shape: (nframes, ndim) merged_output = { diff --git a/deepmd/pd/utils/stat.py b/deepmd/pd/utils/stat.py index f3854e91ad..78b1f52c0c 100644 --- a/deepmd/pd/utils/stat.py +++ b/deepmd/pd/utils/stat.py @@ -441,20 +441,19 @@ def compute_output_stats_global( data_mixed_type = "real_natoms_vec" in sampled[0] natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" - for system in sampled: - if "atom_exclude_types" in system: - type_mask = AtomExcludeMask( - ntypes, system["atom_exclude_types"] - ).get_type_mask() - system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) - - input_natoms = { - kk: [ - to_numpy_array(sampled[idx][natoms_key]) - for idx in global_sampled_idx.get(kk, []) - ] - for kk in keys - } + input_natoms = {} + for kk in keys: + kk_natoms = [] + for idx in global_sampled_idx.get(kk, []): + nn = to_numpy_array(sampled[idx][natoms_key]) + if "atom_exclude_types" in sampled[idx]: + nn = nn.copy() + type_mask = AtomExcludeMask( + ntypes, sampled[idx]["atom_exclude_types"] + ).get_type_mask() + nn[:, 2:] *= to_numpy_array(type_mask).reshape(1, -1) + kk_natoms.append(nn) + input_natoms[kk] = kk_natoms # shape: (nframes, ndim) merged_output = { kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0 diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 657cfa1f17..5f1335b42b 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -441,20 +441,19 @@ def compute_output_stats_global( data_mixed_type = "real_natoms_vec" in sampled[0] natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" - for system in sampled: - if "atom_exclude_types" in system: - type_mask = AtomExcludeMask( - ntypes, system["atom_exclude_types"] - ).get_type_mask() - system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) - - input_natoms = { - kk: [ - to_numpy_array(sampled[idx][natoms_key]) - for idx in global_sampled_idx.get(kk, []) - ] - for kk in keys - } + input_natoms = {} + for kk in keys: + kk_natoms = [] + for idx in global_sampled_idx.get(kk, []): + nn = to_numpy_array(sampled[idx][natoms_key]) + if "atom_exclude_types" in sampled[idx]: + nn = nn.copy() + type_mask = AtomExcludeMask( + ntypes, sampled[idx]["atom_exclude_types"] + ).get_type_mask() + nn[:, 2:] *= to_numpy_array(type_mask).reshape(1, -1) + kk_natoms.append(nn) + input_natoms[kk] = kk_natoms # shape: (nframes, ndim) merged_output = { kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0 diff --git a/source/tests/consistent/utils/test_stat.py b/source/tests/consistent/utils/test_stat.py index 7acb41cb61..7f6c38cdf0 100644 --- a/source/tests/consistent/utils/test_stat.py +++ b/source/tests/consistent/utils/test_stat.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later """Cross-backend consistency tests for compute_output_stats.""" -import copy from collections import ( defaultdict, ) @@ -81,23 +80,23 @@ def _make_data( keys = ["energy"] # Build a single system dict (both frames in one system) - system: dict = { + system_np: dict = { "atype": atype, "natoms": natoms.copy(), } if mixed_type: - system["real_natoms_vec"] = real_natoms_vec.copy() + system_np["real_natoms_vec"] = real_natoms_vec.copy() if has_global: - system["energy"] = energy - system["find_energy"] = np.float32(1.0) + system_np["energy"] = energy + system_np["find_energy"] = np.float32(1.0) if has_atomic: - system["atom_energy"] = atom_energy - system["find_atom_energy"] = np.float32(1.0) + system_np["atom_energy"] = atom_energy + system_np["find_atom_energy"] = np.float32(1.0) if exclude_types: - system["atom_exclude_types"] = exclude_types + system_np["atom_exclude_types"] = exclude_types - sampled_np = [system] + sampled_dp = [system_np] # Convert to torch tensors for pt backend def _to_torch(d: dict) -> dict: @@ -111,15 +110,13 @@ def _to_torch(d: dict) -> dict: out[k] = v return out - # Deep-copy before passing to each backend (both may mutate natoms in-place) - sampled_dp = copy.deepcopy(sampled_np) - sampled_pt = [_to_torch(s) for s in copy.deepcopy(sampled_np)] + sampled_pt = [_to_torch(system_np)] # Precompute indices (same logic used by both backends' compute_output_stats) atomic_sampled_idx: dict = defaultdict(list) global_sampled_idx: dict = defaultdict(list) for kk in keys: - for idx, s in enumerate(sampled_np): + for idx, s in enumerate(sampled_dp): if ("find_atom_" + kk) in s and s["find_atom_" + kk] > 0.0: atomic_sampled_idx[kk].append(idx) if ("find_" + kk) in s and s["find_" + kk] > 0.0: @@ -274,3 +271,60 @@ def test_both_global_and_atomic(self, mixed_type, exclude_types) -> None: assert dp_bias[kk].shape[0] == NTYPES np.testing.assert_allclose(dp_bias[kk], pt_bias_np, rtol=1e-10, atol=1e-10) np.testing.assert_allclose(dp_std[kk], pt_std_np, rtol=1e-10, atol=1e-10) + + +@pytest.mark.skipif(not INSTALLED_PT, reason="PyTorch is not installed") +class TestComputeOutputStatNoMutation: + """Verify that stat functions do not mutate input sampled data.""" + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + def test_global_no_mutation(self, mixed_type) -> None: + """compute_output_stats_global must not mutate input with exclude_types.""" + sampled_dp, sampled_pt, global_idx, _ = _make_data( + has_global=True, + has_atomic=False, + mixed_type=mixed_type, + exclude_types=[1], + ) + keys = ["energy"] + natoms_key = "real_natoms_vec" if mixed_type else "natoms" + + # snapshot before + dp_natoms_before = sampled_dp[0][natoms_key].copy() + pt_natoms_before = sampled_pt[0][natoms_key].clone() + + compute_output_stats_global_dp( + sampled_dp, NTYPES, keys, global_sampled_idx=global_idx + ) + compute_output_stats_global_pt( + sampled_pt, NTYPES, keys, global_sampled_idx=global_idx + ) + + # verify no mutation + np.testing.assert_array_equal(sampled_dp[0][natoms_key], dp_natoms_before) + np.testing.assert_array_equal( + sampled_pt[0][natoms_key].numpy(), pt_natoms_before.numpy() + ) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + def test_full_no_mutation(self, mixed_type) -> None: + """compute_output_stats must not mutate input with exclude_types.""" + sampled_dp, sampled_pt, _, _ = _make_data( + has_global=True, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=[1], + ) + keys = ["energy"] + natoms_key = "real_natoms_vec" if mixed_type else "natoms" + + dp_natoms_before = sampled_dp[0][natoms_key].copy() + pt_natoms_before = sampled_pt[0][natoms_key].clone() + + compute_output_stats_dp(sampled_dp, NTYPES, keys) + compute_output_stats_pt(sampled_pt, NTYPES, keys) + + np.testing.assert_array_equal(sampled_dp[0][natoms_key], dp_natoms_before) + np.testing.assert_array_equal( + sampled_pt[0][natoms_key].numpy(), pt_natoms_before.numpy() + ) From a53d7d5d50f1a07c8fda63acf3fe0001154245d4 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 25 Feb 2026 15:45:06 +0800 Subject: [PATCH 5/7] add consistency test between dp and pd --- source/tests/consistent/utils/test_stat.py | 359 ++++++++++++++++----- 1 file changed, 276 insertions(+), 83 deletions(-) diff --git a/source/tests/consistent/utils/test_stat.py b/source/tests/consistent/utils/test_stat.py index 7f6c38cdf0..eb6d6745ed 100644 --- a/source/tests/consistent/utils/test_stat.py +++ b/source/tests/consistent/utils/test_stat.py @@ -17,6 +17,7 @@ ) from ..common import ( + INSTALLED_PD, INSTALLED_PT, ) @@ -30,9 +31,18 @@ from deepmd.pt.utils.stat import ( compute_output_stats_global as compute_output_stats_global_pt, ) - from deepmd.pt.utils.utils import ( - to_numpy_array, + from deepmd.pt.utils.utils import to_numpy_array as to_numpy_array_pt +if INSTALLED_PD: + import paddle + + from deepmd.pd.utils.stat import compute_output_stats as compute_output_stats_pd + from deepmd.pd.utils.stat import ( + compute_output_stats_atomic as compute_output_stats_atomic_pd, + ) + from deepmd.pd.utils.stat import ( + compute_output_stats_global as compute_output_stats_global_pd, ) + from deepmd.pd.utils.utils import to_numpy_array as to_numpy_array_pd NTYPES = 2 NFRAMES = 2 @@ -44,15 +54,13 @@ def _make_data( has_atomic: bool, mixed_type: bool, exclude_types: list[int], -) -> tuple[list[dict], list[dict], dict, dict]: - """Build identical stat data for dpmodel (numpy) and pt (torch) backends. +) -> tuple[list[dict], dict, dict]: + """Build stat data with numpy arrays and precomputed indices. Returns ------- - sampled_dp : list[dict] - Data with numpy arrays for the dpmodel backend. - sampled_pt : list[dict] - Data with torch tensors for the pt backend. + sampled : list[dict] + Data with numpy arrays. global_sampled_idx : dict Precomputed indices for global labels. atomic_sampled_idx : dict @@ -96,10 +104,25 @@ def _make_data( if exclude_types: system_np["atom_exclude_types"] = exclude_types - sampled_dp = [system_np] + sampled = [system_np] + + # Precompute indices (same logic used by all backends' compute_output_stats) + atomic_sampled_idx: dict = defaultdict(list) + global_sampled_idx: dict = defaultdict(list) + for kk in keys: + for idx, s in enumerate(sampled): + if ("find_atom_" + kk) in s and s["find_atom_" + kk] > 0.0: + atomic_sampled_idx[kk].append(idx) + if ("find_" + kk) in s and s["find_" + kk] > 0.0: + global_sampled_idx[kk].append(idx) + + return sampled, global_sampled_idx, atomic_sampled_idx + - # Convert to torch tensors for pt backend - def _to_torch(d: dict) -> dict: +def _np_to_torch(sampled: list[dict]) -> list[dict]: + """Convert numpy sampled data to torch tensors.""" + result = [] + for d in sampled: out = {} for k, v in d.items(): if isinstance(v, np.ndarray): @@ -108,41 +131,45 @@ def _to_torch(d: dict) -> dict: out[k] = v else: out[k] = v - return out - - sampled_pt = [_to_torch(system_np)] + result.append(out) + return result - # Precompute indices (same logic used by both backends' compute_output_stats) - atomic_sampled_idx: dict = defaultdict(list) - global_sampled_idx: dict = defaultdict(list) - for kk in keys: - for idx, s in enumerate(sampled_dp): - if ("find_atom_" + kk) in s and s["find_atom_" + kk] > 0.0: - atomic_sampled_idx[kk].append(idx) - if ("find_" + kk) in s and s["find_" + kk] > 0.0: - global_sampled_idx[kk].append(idx) - return sampled_dp, sampled_pt, global_sampled_idx, atomic_sampled_idx +def _np_to_paddle(sampled: list[dict]) -> list[dict]: + """Convert numpy sampled data to paddle tensors.""" + result = [] + for d in sampled: + out = {} + for k, v in d.items(): + if isinstance(v, np.ndarray): + out[k] = paddle.to_tensor(v.copy()) + elif isinstance(v, np.float32): + out[k] = v + else: + out[k] = v + result.append(out) + return result @pytest.mark.skipif(not INSTALLED_PT, reason="PyTorch is not installed") -class TestComputeOutputStatConsistency: - """Cross-backend consistency tests for compute_output_stats_global/atomic.""" +class TestComputeOutputStatConsistencyPT: + """Cross-backend consistency tests for compute_output_stats_global/atomic: dp vs pt.""" @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types def test_global(self, mixed_type, exclude_types) -> None: """compute_output_stats_global dp vs pt.""" - sampled_dp, sampled_pt, global_idx, _ = _make_data( + sampled, global_idx, _ = _make_data( has_global=True, has_atomic=False, mixed_type=mixed_type, exclude_types=exclude_types, ) + sampled_pt = _np_to_torch(sampled) keys = ["energy"] dp_bias, dp_std = compute_output_stats_global_dp( - sampled_dp, NTYPES, keys, global_sampled_idx=global_idx + sampled, NTYPES, keys, global_sampled_idx=global_idx ) pt_bias, pt_std = compute_output_stats_global_pt( sampled_pt, NTYPES, keys, global_sampled_idx=global_idx @@ -157,16 +184,17 @@ def test_global(self, mixed_type, exclude_types) -> None: @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types def test_atomic(self, mixed_type, exclude_types) -> None: """compute_output_stats_atomic dp vs pt.""" - sampled_dp, sampled_pt, _, atomic_idx = _make_data( + sampled, _, atomic_idx = _make_data( has_global=False, has_atomic=True, mixed_type=mixed_type, exclude_types=exclude_types, ) + sampled_pt = _np_to_torch(sampled) keys = ["energy"] dp_bias, dp_std = compute_output_stats_atomic_dp( - sampled_dp, NTYPES, keys, atomic_sampled_idx=atomic_idx + sampled, NTYPES, keys, atomic_sampled_idx=atomic_idx ) pt_bias, pt_std = compute_output_stats_atomic_pt( sampled_pt, NTYPES, keys, atomic_sampled_idx=atomic_idx @@ -179,35 +207,28 @@ def test_atomic(self, mixed_type, exclude_types) -> None: @pytest.mark.skipif(not INSTALLED_PT, reason="PyTorch is not installed") -class TestComputeOutputStatFullConsistency: - """Cross-backend consistency tests for the top-level compute_output_stats.""" +class TestComputeOutputStatFullConsistencyPT: + """Cross-backend consistency tests for the top-level compute_output_stats: dp vs pt.""" @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types def test_global_only(self, mixed_type, exclude_types) -> None: """Global labels only through full compute_output_stats.""" - sampled_dp, sampled_pt, _, _ = _make_data( + sampled, _, _ = _make_data( has_global=True, has_atomic=False, mixed_type=mixed_type, exclude_types=exclude_types, ) + sampled_pt = _np_to_torch(sampled) keys = ["energy"] - dp_bias, dp_std = compute_output_stats_dp( - sampled_dp, - NTYPES, - keys, - ) - pt_bias, pt_std = compute_output_stats_pt( - sampled_pt, - NTYPES, - keys, - ) + dp_bias, dp_std = compute_output_stats_dp(sampled, NTYPES, keys) + pt_bias, pt_std = compute_output_stats_pt(sampled_pt, NTYPES, keys) for kk in keys: - pt_bias_np = to_numpy_array(pt_bias[kk]) - pt_std_np = to_numpy_array(pt_std[kk]) + pt_bias_np = to_numpy_array_pt(pt_bias[kk]) + pt_std_np = to_numpy_array_pt(pt_std[kk]) assert dp_bias[kk].shape[0] == NTYPES np.testing.assert_allclose(dp_bias[kk], pt_bias_np, rtol=1e-10, atol=1e-10) np.testing.assert_allclose(dp_std[kk], pt_std_np, rtol=1e-10, atol=1e-10) @@ -216,28 +237,21 @@ def test_global_only(self, mixed_type, exclude_types) -> None: @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types def test_atomic_only(self, mixed_type, exclude_types) -> None: """Atomic labels only through full compute_output_stats.""" - sampled_dp, sampled_pt, _, _ = _make_data( + sampled, _, _ = _make_data( has_global=False, has_atomic=True, mixed_type=mixed_type, exclude_types=exclude_types, ) + sampled_pt = _np_to_torch(sampled) keys = ["energy"] - dp_bias, dp_std = compute_output_stats_dp( - sampled_dp, - NTYPES, - keys, - ) - pt_bias, pt_std = compute_output_stats_pt( - sampled_pt, - NTYPES, - keys, - ) + dp_bias, dp_std = compute_output_stats_dp(sampled, NTYPES, keys) + pt_bias, pt_std = compute_output_stats_pt(sampled_pt, NTYPES, keys) for kk in keys: - pt_bias_np = to_numpy_array(pt_bias[kk]) - pt_std_np = to_numpy_array(pt_std[kk]) + pt_bias_np = to_numpy_array_pt(pt_bias[kk]) + pt_std_np = to_numpy_array_pt(pt_std[kk]) assert dp_bias[kk].shape[0] == NTYPES np.testing.assert_allclose(dp_bias[kk], pt_bias_np, rtol=1e-10, atol=1e-10) np.testing.assert_allclose(dp_std[kk], pt_std_np, rtol=1e-10, atol=1e-10) @@ -246,62 +260,183 @@ def test_atomic_only(self, mixed_type, exclude_types) -> None: @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types def test_both_global_and_atomic(self, mixed_type, exclude_types) -> None: """Both global and atomic labels through full compute_output_stats.""" - sampled_dp, sampled_pt, _, _ = _make_data( + sampled, _, _ = _make_data( has_global=True, has_atomic=True, mixed_type=mixed_type, exclude_types=exclude_types, ) + sampled_pt = _np_to_torch(sampled) keys = ["energy"] - dp_bias, dp_std = compute_output_stats_dp( - sampled_dp, - NTYPES, - keys, - ) - pt_bias, pt_std = compute_output_stats_pt( - sampled_pt, - NTYPES, - keys, - ) + dp_bias, dp_std = compute_output_stats_dp(sampled, NTYPES, keys) + pt_bias, pt_std = compute_output_stats_pt(sampled_pt, NTYPES, keys) for kk in keys: - pt_bias_np = to_numpy_array(pt_bias[kk]) - pt_std_np = to_numpy_array(pt_std[kk]) + pt_bias_np = to_numpy_array_pt(pt_bias[kk]) + pt_std_np = to_numpy_array_pt(pt_std[kk]) assert dp_bias[kk].shape[0] == NTYPES np.testing.assert_allclose(dp_bias[kk], pt_bias_np, rtol=1e-10, atol=1e-10) np.testing.assert_allclose(dp_std[kk], pt_std_np, rtol=1e-10, atol=1e-10) +@pytest.mark.skipif(not INSTALLED_PD, reason="Paddle is not installed") +class TestComputeOutputStatConsistencyPD: + """Cross-backend consistency tests for compute_output_stats_global/atomic: dp vs pd.""" + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_global(self, mixed_type, exclude_types) -> None: + """compute_output_stats_global dp vs pd.""" + sampled, global_idx, _ = _make_data( + has_global=True, + has_atomic=False, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + sampled_pd = _np_to_paddle(sampled) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_global_dp( + sampled, NTYPES, keys, global_sampled_idx=global_idx + ) + pd_bias, pd_std = compute_output_stats_global_pd( + sampled_pd, NTYPES, keys, global_sampled_idx=global_idx + ) + + for kk in keys: + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pd_bias[kk], rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pd_std[kk], rtol=1e-10, atol=1e-10) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_atomic(self, mixed_type, exclude_types) -> None: + """compute_output_stats_atomic dp vs pd.""" + sampled, _, atomic_idx = _make_data( + has_global=False, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + sampled_pd = _np_to_paddle(sampled) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_atomic_dp( + sampled, NTYPES, keys, atomic_sampled_idx=atomic_idx + ) + pd_bias, pd_std = compute_output_stats_atomic_pd( + sampled_pd, NTYPES, keys, atomic_sampled_idx=atomic_idx + ) + + for kk in keys: + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pd_bias[kk], rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pd_std[kk], rtol=1e-10, atol=1e-10) + + +@pytest.mark.skipif(not INSTALLED_PD, reason="Paddle is not installed") +class TestComputeOutputStatFullConsistencyPD: + """Cross-backend consistency tests for the top-level compute_output_stats: dp vs pd.""" + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_global_only(self, mixed_type, exclude_types) -> None: + """Global labels only through full compute_output_stats.""" + sampled, _, _ = _make_data( + has_global=True, + has_atomic=False, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + sampled_pd = _np_to_paddle(sampled) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_dp(sampled, NTYPES, keys) + pd_bias, pd_std = compute_output_stats_pd(sampled_pd, NTYPES, keys) + + for kk in keys: + pd_bias_np = to_numpy_array_pd(pd_bias[kk]) + pd_std_np = to_numpy_array_pd(pd_std[kk]) + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pd_bias_np, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pd_std_np, rtol=1e-10, atol=1e-10) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_atomic_only(self, mixed_type, exclude_types) -> None: + """Atomic labels only through full compute_output_stats.""" + sampled, _, _ = _make_data( + has_global=False, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + sampled_pd = _np_to_paddle(sampled) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_dp(sampled, NTYPES, keys) + pd_bias, pd_std = compute_output_stats_pd(sampled_pd, NTYPES, keys) + + for kk in keys: + pd_bias_np = to_numpy_array_pd(pd_bias[kk]) + pd_std_np = to_numpy_array_pd(pd_std[kk]) + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pd_bias_np, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pd_std_np, rtol=1e-10, atol=1e-10) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + @pytest.mark.parametrize("exclude_types", [[], [1]]) # atom_exclude_types + def test_both_global_and_atomic(self, mixed_type, exclude_types) -> None: + """Both global and atomic labels through full compute_output_stats.""" + sampled, _, _ = _make_data( + has_global=True, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=exclude_types, + ) + sampled_pd = _np_to_paddle(sampled) + keys = ["energy"] + + dp_bias, dp_std = compute_output_stats_dp(sampled, NTYPES, keys) + pd_bias, pd_std = compute_output_stats_pd(sampled_pd, NTYPES, keys) + + for kk in keys: + pd_bias_np = to_numpy_array_pd(pd_bias[kk]) + pd_std_np = to_numpy_array_pd(pd_std[kk]) + assert dp_bias[kk].shape[0] == NTYPES + np.testing.assert_allclose(dp_bias[kk], pd_bias_np, rtol=1e-10, atol=1e-10) + np.testing.assert_allclose(dp_std[kk], pd_std_np, rtol=1e-10, atol=1e-10) + + @pytest.mark.skipif(not INSTALLED_PT, reason="PyTorch is not installed") -class TestComputeOutputStatNoMutation: - """Verify that stat functions do not mutate input sampled data.""" +class TestComputeOutputStatNoMutationPT: + """Verify that stat functions do not mutate input sampled data (pt).""" @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type def test_global_no_mutation(self, mixed_type) -> None: """compute_output_stats_global must not mutate input with exclude_types.""" - sampled_dp, sampled_pt, global_idx, _ = _make_data( + sampled, global_idx, _ = _make_data( has_global=True, has_atomic=False, mixed_type=mixed_type, exclude_types=[1], ) + sampled_pt = _np_to_torch(sampled) keys = ["energy"] natoms_key = "real_natoms_vec" if mixed_type else "natoms" - # snapshot before - dp_natoms_before = sampled_dp[0][natoms_key].copy() + dp_natoms_before = sampled[0][natoms_key].copy() pt_natoms_before = sampled_pt[0][natoms_key].clone() compute_output_stats_global_dp( - sampled_dp, NTYPES, keys, global_sampled_idx=global_idx + sampled, NTYPES, keys, global_sampled_idx=global_idx ) compute_output_stats_global_pt( sampled_pt, NTYPES, keys, global_sampled_idx=global_idx ) - # verify no mutation - np.testing.assert_array_equal(sampled_dp[0][natoms_key], dp_natoms_before) + np.testing.assert_array_equal(sampled[0][natoms_key], dp_natoms_before) np.testing.assert_array_equal( sampled_pt[0][natoms_key].numpy(), pt_natoms_before.numpy() ) @@ -309,22 +444,80 @@ def test_global_no_mutation(self, mixed_type) -> None: @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type def test_full_no_mutation(self, mixed_type) -> None: """compute_output_stats must not mutate input with exclude_types.""" - sampled_dp, sampled_pt, _, _ = _make_data( + sampled, _, _ = _make_data( has_global=True, has_atomic=True, mixed_type=mixed_type, exclude_types=[1], ) + sampled_pt = _np_to_torch(sampled) keys = ["energy"] natoms_key = "real_natoms_vec" if mixed_type else "natoms" - dp_natoms_before = sampled_dp[0][natoms_key].copy() + dp_natoms_before = sampled[0][natoms_key].copy() pt_natoms_before = sampled_pt[0][natoms_key].clone() - compute_output_stats_dp(sampled_dp, NTYPES, keys) + compute_output_stats_dp(sampled, NTYPES, keys) compute_output_stats_pt(sampled_pt, NTYPES, keys) - np.testing.assert_array_equal(sampled_dp[0][natoms_key], dp_natoms_before) + np.testing.assert_array_equal(sampled[0][natoms_key], dp_natoms_before) np.testing.assert_array_equal( sampled_pt[0][natoms_key].numpy(), pt_natoms_before.numpy() ) + + +@pytest.mark.skipif(not INSTALLED_PD, reason="Paddle is not installed") +class TestComputeOutputStatNoMutationPD: + """Verify that stat functions do not mutate input sampled data (pd).""" + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + def test_global_no_mutation(self, mixed_type) -> None: + """compute_output_stats_global must not mutate input with exclude_types.""" + sampled, global_idx, _ = _make_data( + has_global=True, + has_atomic=False, + mixed_type=mixed_type, + exclude_types=[1], + ) + sampled_pd = _np_to_paddle(sampled) + keys = ["energy"] + natoms_key = "real_natoms_vec" if mixed_type else "natoms" + + dp_natoms_before = sampled[0][natoms_key].copy() + pd_natoms_before = sampled_pd[0][natoms_key].numpy().copy() + + compute_output_stats_global_dp( + sampled, NTYPES, keys, global_sampled_idx=global_idx + ) + compute_output_stats_global_pd( + sampled_pd, NTYPES, keys, global_sampled_idx=global_idx + ) + + np.testing.assert_array_equal(sampled[0][natoms_key], dp_natoms_before) + np.testing.assert_array_equal( + sampled_pd[0][natoms_key].numpy(), pd_natoms_before + ) + + @pytest.mark.parametrize("mixed_type", [False, True]) # mixed_type + def test_full_no_mutation(self, mixed_type) -> None: + """compute_output_stats must not mutate input with exclude_types.""" + sampled, _, _ = _make_data( + has_global=True, + has_atomic=True, + mixed_type=mixed_type, + exclude_types=[1], + ) + sampled_pd = _np_to_paddle(sampled) + keys = ["energy"] + natoms_key = "real_natoms_vec" if mixed_type else "natoms" + + dp_natoms_before = sampled[0][natoms_key].copy() + pd_natoms_before = sampled_pd[0][natoms_key].numpy().copy() + + compute_output_stats_dp(sampled, NTYPES, keys) + compute_output_stats_pd(sampled_pd, NTYPES, keys) + + np.testing.assert_array_equal(sampled[0][natoms_key], dp_natoms_before) + np.testing.assert_array_equal( + sampled_pd[0][natoms_key].numpy(), pd_natoms_before + ) From f47c36b5f34742f368188d8f4ae4bb58705b0367 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Wed, 25 Feb 2026 15:54:39 +0800 Subject: [PATCH 6/7] rm unused natoms. --- deepmd/dpmodel/utils/env_mat_stat.py | 3 +-- deepmd/pd/utils/env_mat_stat.py | 3 +-- deepmd/pd/utils/stat.py | 3 +-- deepmd/pt/utils/stat.py | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/deepmd/dpmodel/utils/env_mat_stat.py b/deepmd/dpmodel/utils/env_mat_stat.py index 37a69ea1b1..238e395104 100644 --- a/deepmd/dpmodel/utils/env_mat_stat.py +++ b/deepmd/dpmodel/utils/env_mat_stat.py @@ -128,11 +128,10 @@ def iter( device=array_api_compat.device(data[0]["coord"]), ) for system in data: - coord, atype, box, natoms = ( + coord, atype, box = ( system["coord"], system["atype"], system["box"], - system["natoms"], ) ( extended_coord, diff --git a/deepmd/pd/utils/env_mat_stat.py b/deepmd/pd/utils/env_mat_stat.py index aed5259a50..53c41a9edc 100644 --- a/deepmd/pd/utils/env_mat_stat.py +++ b/deepmd/pd/utils/env_mat_stat.py @@ -107,11 +107,10 @@ def iter( "last_dim should be 1 for raial-only or 4 for full descriptor." ) for system in data: - coord, atype, box, natoms = ( + coord, atype, box = ( system["coord"], system["atype"], system["box"], - system["natoms"], ) ( extended_coord, diff --git a/deepmd/pd/utils/stat.py b/deepmd/pd/utils/stat.py index 78b1f52c0c..cf9bad2290 100644 --- a/deepmd/pd/utils/stat.py +++ b/deepmd/pd/utils/stat.py @@ -167,11 +167,10 @@ def _compute_model_predict( model_predict = {kk: [] for kk in keys} for system in sampled: nframes = system["coord"].shape[0] - coord, atype, box, natoms = ( + coord, atype, box = ( system["coord"], system["atype"], system["box"], - system["natoms"], ) fparam = system.get("fparam", None) aparam = system.get("aparam", None) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 5f1335b42b..8200977b00 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -167,11 +167,10 @@ def _compute_model_predict( model_predict = {kk: [] for kk in keys} for system in sampled: nframes = system["coord"].shape[0] - coord, atype, box, natoms = ( + coord, atype, box = ( system["coord"], system["atype"], system["box"], - system["natoms"], ) fparam = system.get("fparam", None) aparam = system.get("aparam", None) From 6173cf04967f349d4df1a62b111a766366d11df3 Mon Sep 17 00:00:00 2001 From: Han Wang Date: Thu, 26 Feb 2026 09:43:37 +0800 Subject: [PATCH 7/7] add _ for helpers --- deepmd/dpmodel/utils/stat.py | 8 ++++---- deepmd/pd/utils/stat.py | 8 ++++---- deepmd/pt/utils/stat.py | 8 ++++---- source/tests/consistent/utils/test_stat.py | 18 +++++++++--------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/deepmd/dpmodel/utils/stat.py b/deepmd/dpmodel/utils/stat.py index 4f3b3b7757..34c500d7c8 100644 --- a/deepmd/dpmodel/utils/stat.py +++ b/deepmd/dpmodel/utils/stat.py @@ -292,7 +292,7 @@ def compute_output_stats( ) # compute stat - bias_atom_g, std_atom_g = compute_output_stats_global( + bias_atom_g, std_atom_g = _compute_output_stats_global( sampled, ntypes, keys, @@ -303,7 +303,7 @@ def compute_output_stats( intensive, model_pred_g, ) - bias_atom_a, std_atom_a = compute_output_stats_atomic( + bias_atom_a, std_atom_a = _compute_output_stats_atomic( sampled, ntypes, keys, @@ -336,7 +336,7 @@ def compute_output_stats( return bias_atom_e, std_atom_e -def compute_output_stats_global( +def _compute_output_stats_global( sampled: list[dict], ntypes: int, keys: list[str], @@ -461,7 +461,7 @@ def rmse(x: np.ndarray) -> float: return bias_atom_e, std_atom_e -def compute_output_stats_atomic( +def _compute_output_stats_atomic( sampled: list[dict], ntypes: int, keys: list[str], diff --git a/deepmd/pd/utils/stat.py b/deepmd/pd/utils/stat.py index cf9bad2290..b6d635833c 100644 --- a/deepmd/pd/utils/stat.py +++ b/deepmd/pd/utils/stat.py @@ -368,7 +368,7 @@ def compute_output_stats( ) # compute stat - bias_atom_g, std_atom_g = compute_output_stats_global( + bias_atom_g, std_atom_g = _compute_output_stats_global( sampled, ntypes, keys, @@ -379,7 +379,7 @@ def compute_output_stats( intensive, model_pred_g, ) - bias_atom_a, std_atom_a = compute_output_stats_atomic( + bias_atom_a, std_atom_a = _compute_output_stats_atomic( sampled, ntypes, keys, @@ -414,7 +414,7 @@ def compute_output_stats( return bias_atom_e, std_atom_e -def compute_output_stats_global( +def _compute_output_stats_global( sampled: list[dict], ntypes: int, keys: list[str], @@ -542,7 +542,7 @@ def rmse(x: np.ndarray) -> float: return bias_atom_e, std_atom_e -def compute_output_stats_atomic( +def _compute_output_stats_atomic( sampled: list[dict], ntypes: int, keys: list[str], diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 8200977b00..b2824d0ac4 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -368,7 +368,7 @@ def compute_output_stats( ) # compute stat - bias_atom_g, std_atom_g = compute_output_stats_global( + bias_atom_g, std_atom_g = _compute_output_stats_global( sampled, ntypes, keys, @@ -379,7 +379,7 @@ def compute_output_stats( intensive, model_pred_g, ) - bias_atom_a, std_atom_a = compute_output_stats_atomic( + bias_atom_a, std_atom_a = _compute_output_stats_atomic( sampled, ntypes, keys, @@ -414,7 +414,7 @@ def compute_output_stats( return bias_atom_e, std_atom_e -def compute_output_stats_global( +def _compute_output_stats_global( sampled: list[dict], ntypes: int, keys: list[str], @@ -539,7 +539,7 @@ def rmse(x: np.ndarray) -> float: return bias_atom_e, std_atom_e -def compute_output_stats_atomic( +def _compute_output_stats_atomic( sampled: list[dict], ntypes: int, keys: list[str], diff --git a/source/tests/consistent/utils/test_stat.py b/source/tests/consistent/utils/test_stat.py index eb6d6745ed..d2a7a6c1b6 100644 --- a/source/tests/consistent/utils/test_stat.py +++ b/source/tests/consistent/utils/test_stat.py @@ -8,13 +8,13 @@ import numpy as np import pytest -from deepmd.dpmodel.utils.stat import compute_output_stats as compute_output_stats_dp from deepmd.dpmodel.utils.stat import ( - compute_output_stats_atomic as compute_output_stats_atomic_dp, + _compute_output_stats_atomic as compute_output_stats_atomic_dp, ) from deepmd.dpmodel.utils.stat import ( - compute_output_stats_global as compute_output_stats_global_dp, + _compute_output_stats_global as compute_output_stats_global_dp, ) +from deepmd.dpmodel.utils.stat import compute_output_stats as compute_output_stats_dp from ..common import ( INSTALLED_PD, @@ -24,24 +24,24 @@ if INSTALLED_PT: import torch - from deepmd.pt.utils.stat import compute_output_stats as compute_output_stats_pt from deepmd.pt.utils.stat import ( - compute_output_stats_atomic as compute_output_stats_atomic_pt, + _compute_output_stats_atomic as compute_output_stats_atomic_pt, ) from deepmd.pt.utils.stat import ( - compute_output_stats_global as compute_output_stats_global_pt, + _compute_output_stats_global as compute_output_stats_global_pt, ) + from deepmd.pt.utils.stat import compute_output_stats as compute_output_stats_pt from deepmd.pt.utils.utils import to_numpy_array as to_numpy_array_pt if INSTALLED_PD: import paddle - from deepmd.pd.utils.stat import compute_output_stats as compute_output_stats_pd from deepmd.pd.utils.stat import ( - compute_output_stats_atomic as compute_output_stats_atomic_pd, + _compute_output_stats_atomic as compute_output_stats_atomic_pd, ) from deepmd.pd.utils.stat import ( - compute_output_stats_global as compute_output_stats_global_pd, + _compute_output_stats_global as compute_output_stats_global_pd, ) + from deepmd.pd.utils.stat import compute_output_stats as compute_output_stats_pd from deepmd.pd.utils.utils import to_numpy_array as to_numpy_array_pd NTYPES = 2