Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions deepmd/dpmodel/utils/env_mat_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,10 @@ def iter(
device=array_api_compat.device(data[0]["coord"]),
)
for system in data:
coord, atype, box, natoms = (
coord, atype, box = (
system["coord"],
system["atype"],
system["box"],
system["natoms"],
)
(
extended_coord,
Expand Down
38 changes: 23 additions & 15 deletions deepmd/dpmodel/utils/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from deepmd.dpmodel.common import (
to_numpy_array,
)
from deepmd.dpmodel.utils.exclude_mask import (
AtomExcludeMask,
)
from deepmd.utils.out_stat import (
compute_stats_do_not_distinguish_types,
compute_stats_from_atomic,
Expand Down Expand Up @@ -245,10 +248,8 @@ def compute_output_stats(
system["find_atom_" + kk] > 0.0
):
atomic_sampled_idx[kk].append(idx)
elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
if (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
global_sampled_idx[kk].append(idx)
else:
continue

# use index to gather model predictions for the corresponding systems.
model_pred_g = (
Expand Down Expand Up @@ -291,7 +292,7 @@ def compute_output_stats(
)

# compute stat
bias_atom_g, std_atom_g = compute_output_stats_global(
bias_atom_g, std_atom_g = _compute_output_stats_global(
sampled,
ntypes,
keys,
Expand All @@ -302,7 +303,7 @@ def compute_output_stats(
intensive,
model_pred_g,
)
bias_atom_a, std_atom_a = compute_output_stats_atomic(
bias_atom_a, std_atom_a = _compute_output_stats_atomic(
sampled,
ntypes,
keys,
Expand Down Expand Up @@ -335,7 +336,7 @@ def compute_output_stats(
return bias_atom_e, std_atom_e


def compute_output_stats_global(
def _compute_output_stats_global(
sampled: list[dict],
ntypes: int,
keys: list[str],
Expand All @@ -359,14 +360,21 @@ def compute_output_stats_global(
for kk in keys
}

natoms_key = "natoms"
input_natoms = {
kk: [
to_numpy_array(sampled[idx][natoms_key])
for idx in global_sampled_idx.get(kk, [])
]
for kk in keys
}
data_mixed_type = "real_natoms_vec" in sampled[0]
natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
input_natoms = {}
for kk in keys:
kk_natoms = []
for idx in global_sampled_idx.get(kk, []):
nn = to_numpy_array(sampled[idx][natoms_key])
if "atom_exclude_types" in sampled[idx]:
nn = nn.copy()
type_mask = AtomExcludeMask(
ntypes, sampled[idx]["atom_exclude_types"]
).get_type_mask()
nn[:, 2:] *= type_mask.reshape(1, -1)
kk_natoms.append(nn)
input_natoms[kk] = kk_natoms

# shape: (nframes, ndim)
merged_output = {
Expand Down Expand Up @@ -453,7 +461,7 @@ def rmse(x: np.ndarray) -> float:
return bias_atom_e, std_atom_e


def compute_output_stats_atomic(
def _compute_output_stats_atomic(
sampled: list[dict],
ntypes: int,
keys: list[str],
Expand Down
3 changes: 1 addition & 2 deletions deepmd/pd/utils/env_mat_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,10 @@ def iter(
"last_dim should be 1 for raial-only or 4 for full descriptor."
)
for system in data:
coord, atype, box, natoms = (
coord, atype, box = (
system["coord"],
system["atype"],
system["box"],
system["natoms"],
)
(
extended_coord,
Expand Down
102 changes: 48 additions & 54 deletions deepmd/pd/utils/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,10 @@ def _compute_model_predict(
model_predict = {kk: [] for kk in keys}
for system in sampled:
nframes = system["coord"].shape[0]
coord, atype, box, natoms = (
coord, atype, box = (
system["coord"],
system["atype"],
system["box"],
system["natoms"],
)
fparam = system.get("fparam", None)
aparam = system.get("aparam", None)
Expand Down Expand Up @@ -324,12 +323,9 @@ def compute_output_stats(
system["find_atom_" + kk] > 0.0
):
atomic_sampled_idx[kk].append(idx)
elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
if (("find_" + kk) in system) and (system["find_" + kk] > 0.0):
global_sampled_idx[kk].append(idx)

else:
continue

# use index to gather model predictions for the corresponding systems.

model_pred_g = (
Expand Down Expand Up @@ -372,20 +368,22 @@ def compute_output_stats(
)

# compute stat
bias_atom_g, std_atom_g = compute_output_stats_global(
bias_atom_g, std_atom_g = _compute_output_stats_global(
sampled,
ntypes,
keys,
rcond,
preset_bias,
model_pred_g,
global_sampled_idx,
stats_distinguish_types,
intensive,
model_pred_g,
)
bias_atom_a, std_atom_a = compute_output_stats_atomic(
bias_atom_a, std_atom_a = _compute_output_stats_atomic(
sampled,
ntypes,
keys,
atomic_sampled_idx,
model_pred_a,
)

Expand Down Expand Up @@ -416,58 +414,52 @@ def compute_output_stats(
return bias_atom_e, std_atom_e


def compute_output_stats_global(
def _compute_output_stats_global(
sampled: list[dict],
ntypes: int,
keys: list[str],
rcond: float | None = None,
preset_bias: dict[str, list[paddle.Tensor | None]] | None = None,
model_pred: dict[str, np.ndarray] | None = None,
global_sampled_idx: dict | None = None,
stats_distinguish_types: bool = True,
intensive: bool = False,
model_pred: dict[str, np.ndarray] | None = None,
) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""This function only handle stat computation from reduced global labels."""
# return directly if model predict is empty for global
if model_pred == {}:
# return directly if no global samples
if global_sampled_idx is None or all(
len(v) == 0 for v in global_sampled_idx.values()
):
return {}, {}

# get label dict from sample; for each key, only picking the system with global labels.
outputs = {
kk: [
system[kk]
for system in sampled
if kk in system and system.get(f"find_{kk}", 0) > 0
]
kk: [to_numpy_array(sampled[idx][kk]) for idx in global_sampled_idx.get(kk, [])]
for kk in keys
}

data_mixed_type = "real_natoms_vec" in sampled[0]
natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec"
for system in sampled:
if "atom_exclude_types" in system:
type_mask = AtomExcludeMask(
ntypes, system["atom_exclude_types"]
).get_type_mask()
system[natoms_key][:, 2:] *= type_mask.unsqueeze(0)

input_natoms = {
kk: [
item[natoms_key]
for item in sampled
if kk in item and item.get(f"find_{kk}", 0) > 0
]
for kk in keys
}
input_natoms = {}
for kk in keys:
kk_natoms = []
for idx in global_sampled_idx.get(kk, []):
nn = to_numpy_array(sampled[idx][natoms_key])
if "atom_exclude_types" in sampled[idx]:
nn = nn.copy()
type_mask = AtomExcludeMask(
ntypes, sampled[idx]["atom_exclude_types"]
).get_type_mask()
nn[:, 2:] *= to_numpy_array(type_mask).reshape(1, -1)
kk_natoms.append(nn)
input_natoms[kk] = kk_natoms
# shape: (nframes, ndim)
merged_output = {
kk: to_numpy_array(paddle.concat(outputs[kk]))
for kk in keys
if len(outputs[kk]) > 0
kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
}
# shape: (nframes, ntypes)

merged_natoms = {
kk: to_numpy_array(paddle.concat(input_natoms[kk])[:, 2:])
kk: np.concatenate(input_natoms[kk])[:, 2:]
for kk in keys
if len(input_natoms[kk]) > 0
}
Expand Down Expand Up @@ -550,53 +542,55 @@ def rmse(x: np.ndarray) -> float:
return bias_atom_e, std_atom_e


def compute_output_stats_atomic(
def _compute_output_stats_atomic(
sampled: list[dict],
ntypes: int,
keys: list[str],
atomic_sampled_idx: dict | None = None,
model_pred: dict[str, np.ndarray] | None = None,
) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""Compute output statistics from atomic labels."""
# return directly if no atomic samples
if atomic_sampled_idx is None or all(
len(v) == 0 for v in atomic_sampled_idx.values()
):
return {}, {}

# get label dict from sample; for each key, only picking the system with atomic labels.
outputs = {
kk: [
system["atom_" + kk]
for system in sampled
if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
to_numpy_array(sampled[idx]["atom_" + kk])
for idx in atomic_sampled_idx.get(kk, [])
]
for kk in keys
}
natoms = {
kk: [
system["atype"]
for system in sampled
if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0
to_numpy_array(sampled[idx]["atype"])
for idx in atomic_sampled_idx.get(kk, [])
]
for kk in keys
}
# reshape outputs [nframes, nloc * ndim] --> reshape to [nframes * nloc, 1, ndim] for concatenation
# reshape natoms [nframes, nloc] --> reshape to [nframes * nolc, 1] for concatenation
natoms = {k: [sys_v.reshape([-1, 1]) for sys_v in v] for k, v in natoms.items()}
natoms = {k: [sys_v.reshape(-1, 1) for sys_v in v] for k, v in natoms.items()}
outputs = {
k: [
sys.reshape([natoms[k][sys_idx].shape[0], 1, -1])
sys.reshape(natoms[k][sys_idx].shape[0], 1, -1)
for sys_idx, sys in enumerate(v)
]
for k, v in outputs.items()
}

merged_output = {
kk: to_numpy_array(paddle.concat(outputs[kk]))
for kk in keys
if len(outputs[kk]) > 0
kk: np.concatenate(outputs[kk]) for kk in keys if len(outputs[kk]) > 0
}
merged_natoms = {
kk: to_numpy_array(paddle.concat(natoms[kk]))
for kk in keys
if len(natoms[kk]) > 0
kk: np.concatenate(natoms[kk]) for kk in keys if len(natoms[kk]) > 0
}
# reshape merged data to [nf, nloc, ndim]
merged_output = {
kk: merged_output[kk].reshape([*merged_natoms[kk].shape, -1])
kk: merged_output[kk].reshape((*merged_natoms[kk].shape, -1))
for kk in merged_output
}

Expand Down
Loading