diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py index 2f9aa69b62..2a8bad4586 100644 --- a/deepmd/dpmodel/descriptor/dpa1.py +++ b/deepmd/dpmodel/descriptor/dpa1.py @@ -671,6 +671,84 @@ def update_sel( @DescriptorBlock.register("se_atten") class DescrptBlockSeAtten(NativeOP, DescriptorBlock): + r"""The attention-based descriptor block. + + This block computes an embedding matrix using attention mechanism and type embedding. + The descriptor is computed as: + + .. math:: + \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<, + + where :math:`\hat{\mathcal{G}}^i` is the embedding matrix after self-attention layers, + :math:`\mathcal{R}^i` is the coordinate matrix, and :math:`\hat{\mathcal{G}}^i_<` denotes + the first `axis_neuron` columns of :math:`\hat{\mathcal{G}}^i`. + + The embedding matrix :math:`\mathcal{G}^i` is computed by: + + .. math:: + (\mathcal{G}^i)_j = \mathcal{N}(s(r_{ji}), \mathcal{T}_i, \mathcal{T}_j), + + where :math:`\mathcal{N}` is the embedding network, :math:`s(r_{ji})` is the smoothed + radial distance, and :math:`\mathcal{T}` denotes type embedding. + + Parameters + ---------- + rcut : float + The cut-off radius. + rcut_smth : float + Where to start smoothing. + sel : Union[list[int], int] + Maximally possible number of selected neighbors. + ntypes : int + Number of element types. + neuron : list[int], optional + Number of neurons in each hidden layer of the embedding net. + axis_neuron : int, optional + Size of the submatrix of the embedding matrix. + tebd_dim : int, optional + Dimension of the type embedding. + tebd_input_mode : str, optional + The input mode of the type embedding. Supported modes are ["concat", "strip"]. + resnet_dt : bool, optional + Time-step `dt` in the resnet construction. + type_one_side : bool, optional + If True, only type embeddings of neighbor atoms are considered. + attn : int, optional + Hidden dimension of the attention vectors. + attn_layer : int, optional + Number of attention layers. + attn_dotr : bool, optional + If True, dot the angular gate to the attention weights. + attn_mask : bool, optional + If True, mask the diagonal of attention weights. + exclude_types : list[tuple[int, int]], optional + The excluded pairs of types which have no interaction. + env_protection : float, optional + Protection parameter to prevent division by zero. + set_davg_zero : bool, optional + Set the shift of embedding net input to zero. + activation_function : str, optional + The activation function in the embedding net. + precision : str, optional + The precision of the embedding net parameters. + scaling_factor : float, optional + The scaling factor of normalization in attention weights calculation. + normalize : bool, optional + Whether to normalize the hidden vectors in attention weights calculation. + temperature : float, optional + If not None, the scaling of attention weights is `temperature` itself. + trainable_ln : bool, optional + Whether to use trainable shift and scale weights in layer normalization. + ln_eps : float, optional + The epsilon value for layer normalization. + smooth : bool, optional + Whether to use smoothness in attention weights calculation. + seed : int, optional + Random seed for parameter initialization. + trainable : bool, optional + If the parameters are trainable. + """ + def __init__( self, rcut: float, diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py index 9a3be982f1..230d679f0f 100644 --- a/deepmd/dpmodel/descriptor/dpa2.py +++ b/deepmd/dpmodel/descriptor/dpa2.py @@ -369,6 +369,78 @@ def deserialize(cls, data: dict) -> "RepformerArgs": @BaseDescriptor.register("dpa2") class DescrptDPA2(NativeOP, BaseDescriptor): + r"""The DPA-2 descriptor[1]_. + + The DPA-2 descriptor combines a repinit block and a repformer block to extract + atomic representations. The overall descriptor is computed as: + + .. math:: + \mathcal{D}^i = \mathrm{Repformer}(\mathrm{Linear}(\mathrm{Repinit}(\mathcal{R}^i, \mathcal{T}^i))), + + where :math:`\mathcal{R}^i` is the environment matrix and :math:`\mathcal{T}^i` is the + type embedding. + + The repinit block computes initial node and edge representations using attention-based + message passing. The repformer block further refines these representations through + multiple layers of graph convolution and attention mechanisms. + + The final output dimension is: + + .. math:: + \dim(\mathcal{D}^i) = \text{g1\_dim} + \text{tebd\_dim} \quad (\text{if concat\_output\_tebd}). + + Parameters + ---------- + repinit : Union[RepinitArgs, dict] + The arguments used to initialize the repinit block, see docstr in `RepinitArgs` for details information. + repformer : Union[RepformerArgs, dict] + The arguments used to initialize the repformer block, see docstr in `RepformerArgs` for details information. + concat_output_tebd : bool, optional + Whether to concat type embedding at the output of the descriptor. + precision : str, optional + The precision of the embedding net parameters. + smooth : bool, optional + Whether to use smoothness in processes such as attention weights calculation. + exclude_types : list[list[int]], optional + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection : float, optional + Protection parameter to prevent division by zero errors during environment matrix calculations. + For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. + trainable : bool, optional + If the parameters are trainable. + seed : int, optional + (Unused yet) Random seed for parameter initialization. + add_tebd_to_repinit_out : bool, optional + Whether to add type embedding to the output representation from repinit before inputting it into repformer. + use_econf_tebd : bool, Optional + Whether to use electronic configuration type embedding. + use_tebd_bias : bool, Optional + Whether to use bias in the type embedding layer. + type_map : list[str], Optional + A list of strings. Give the name to each type of atoms. + + Returns + ------- + descriptor: torch.Tensor + the descriptor of shape nf x nloc x g1_dim. + invariant single-atom representation. + g2: torch.Tensor + invariant pair-atom representation. + h2: torch.Tensor + equivariant pair-atom representation. + rot_mat: torch.Tensor + rotation matrix for equivariant fittings + sw: torch.Tensor + The switch function for decaying inverse distance. + + References + ---------- + .. [1] Zhang, D., Liu, X., Zhang, X. et al. DPA-2: a + large atomic model as a multi-task learner. npj + Comput Mater 10, 293 (2024). https://doi.org/10.1038/s41524-024-01493-2 + """ + def __init__( self, ntypes: int, @@ -389,60 +461,6 @@ def __init__( use_tebd_bias: bool = False, type_map: list[str] | None = None, ) -> None: - r"""The DPA-2 descriptor[1]_. - - Parameters - ---------- - repinit : Union[RepinitArgs, dict] - The arguments used to initialize the repinit block, see docstr in `RepinitArgs` for details information. - repformer : Union[RepformerArgs, dict] - The arguments used to initialize the repformer block, see docstr in `RepformerArgs` for details information. - concat_output_tebd : bool, optional - Whether to concat type embedding at the output of the descriptor. - precision : str, optional - The precision of the embedding net parameters. - smooth : bool, optional - Whether to use smoothness in processes such as attention weights calculation. - exclude_types : list[list[int]], optional - The excluded pairs of types which have no interaction with each other. - For example, `[[0, 1]]` means no interaction between type 0 and type 1. - env_protection : float, optional - Protection parameter to prevent division by zero errors during environment matrix calculations. - For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection. - trainable : bool, optional - If the parameters are trainable. - seed : int, optional - (Unused yet) Random seed for parameter initialization. - add_tebd_to_repinit_out : bool, optional - Whether to add type embedding to the output representation from repinit before inputting it into repformer. - use_econf_tebd : bool, Optional - Whether to use electronic configuration type embedding. - use_tebd_bias : bool, Optional - Whether to use bias in the type embedding layer. - type_map : list[str], Optional - A list of strings. Give the name to each type of atoms. - - Returns - ------- - descriptor: torch.Tensor - the descriptor of shape nf x nloc x g1_dim. - invariant single-atom representation. - g2: torch.Tensor - invariant pair-atom representation. - h2: torch.Tensor - equivariant pair-atom representation. - rot_mat: torch.Tensor - rotation matrix for equivariant fittings - sw: torch.Tensor - The switch function for decaying inverse distance. - - References - ---------- - .. [1] Zhang, D., Liu, X., Zhang, X. et al. DPA-2: a - large atomic model as a multi-task learner. npj - Comput Mater 10, 293 (2024). https://doi.org/10.1038/s41524-024-01493-2 - """ - def init_subclass_params(sub_data: dict | Any, sub_class: type) -> Any: if isinstance(sub_data, dict): return sub_class(**sub_data) diff --git a/deepmd/dpmodel/descriptor/dpa3.py b/deepmd/dpmodel/descriptor/dpa3.py index 47a4fb1478..c9e640170e 100644 --- a/deepmd/dpmodel/descriptor/dpa3.py +++ b/deepmd/dpmodel/descriptor/dpa3.py @@ -59,6 +59,27 @@ class RepFlowArgs: r"""The constructor for the RepFlowArgs class which defines the parameters of the repflow block in DPA3 descriptor. + The DPA-3 descriptor uses a repflow architecture that maintains and updates three types + of representations: node (:math:`\mathbf{n}`), edge (:math:`\mathbf{e}`), and angle (:math:`\mathbf{a}`). + + The update equations for each layer are: + + .. math:: + \mathbf{n}^{l+1} = \text{UpdateNode}(\mathbf{n}^l, \mathbf{e}^l, \mathbf{a}^l), + + .. math:: + \mathbf{e}^{l+1} = \text{UpdateEdge}(\mathbf{n}^l, \mathbf{e}^l, \mathbf{a}^l), + + .. math:: + \mathbf{a}^{l+1} = \text{UpdateAngle}(\mathbf{n}^l, \mathbf{e}^l, \mathbf{a}^l). + + The final descriptor is obtained by symmetrization: + + .. math:: + \mathcal{D}^i = \text{Symmetrize}(\mathbf{n}^L, \mathbf{e}^L), + + where :math:`L` is the number of repflow layers. + Parameters ---------- n_dim : int, optional @@ -254,6 +275,31 @@ def deserialize(cls, data: dict) -> "RepFlowArgs": class DescrptDPA3(NativeOP, BaseDescriptor): r"""The DPA3 descriptor[1]_. + The DPA-3 descriptor uses a repflow block to iteratively update node, edge, and angle + representations. The descriptor is computed as: + + .. math:: + \mathcal{D}^i = \mathrm{RepFlow}(\mathcal{N}^i, \mathcal{E}^i, \mathcal{A}^i), + + where :math:`\mathcal{N}^i`, :math:`\mathcal{E}^i`, and :math:`\mathcal{A}^i` are the + initial node, edge, and angle representations respectively. + + The repflow block performs iterative updates through multiple layers: + + .. math:: + \mathcal{N}^{i,l+1} = \mathrm{UpdateNode}(\mathcal{N}^{i,l}, \mathcal{E}^{i,l}, \mathcal{A}^{i,l}), + + .. math:: + \mathcal{E}^{i,l+1} = \mathrm{UpdateEdge}(\mathcal{N}^{i,l}, \mathcal{E}^{i,l}, \mathcal{A}^{i,l}), + + .. math:: + \mathcal{A}^{i,l+1} = \mathrm{UpdateAngle}(\mathcal{N}^{i,l}, \mathcal{E}^{i,l}, \mathcal{A}^{i,l}). + + The final descriptor output dimension is: + + .. math:: + \dim(\mathcal{D}^i) = \text{n\_dim} \times \text{axis\_neuron} \quad (\text{after symmetrization}). + Parameters ---------- repflow : Union[RepFlowArgs, dict] diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py index 2cb8585d77..4279f0bfcd 100644 --- a/deepmd/dpmodel/descriptor/hybrid.py +++ b/deepmd/dpmodel/descriptor/hybrid.py @@ -33,7 +33,20 @@ @BaseDescriptor.register("hybrid") class DescrptHybrid(BaseDescriptor, NativeOP): - """Concate a list of descriptors to form a new descriptor. + r"""Concatenate a list of descriptors to form a new descriptor. + + The hybrid descriptor combines multiple descriptors by concatenation: + + .. math:: + \mathcal{D}^i = [\mathcal{D}^i_1, \mathcal{D}^i_2, ..., \mathcal{D}^i_n], + + where :math:`\mathcal{D}^i_k` is the descriptor computed by the :math:`k`-th + sub-descriptor for atom :math:`i`. + + The output dimension is the sum of all sub-descriptor dimensions: + + .. math:: + \dim(\mathcal{D}^i) = \sum_{k=1}^{n} \dim(\mathcal{D}^i_k). Parameters ---------- diff --git a/deepmd/dpmodel/descriptor/repflows.py b/deepmd/dpmodel/descriptor/repflows.py index 3188bbfee5..621029aaa0 100644 --- a/deepmd/dpmodel/descriptor/repflows.py +++ b/deepmd/dpmodel/descriptor/repflows.py @@ -63,6 +63,31 @@ class DescrptBlockRepflows(NativeOP, DescriptorBlock): r""" The repflow descriptor block. + The repflow descriptor maintains three types of representations and updates them + iteratively through message passing: + + - **Node representation** :math:`\mathbf{n}^i \in \mathbb{R}^{n_{dim}}`: single-atom features + - **Edge representation** :math:`\mathbf{e}^{ij} \in \mathbb{R}^{e_{dim}}`: pair-atom features + - **Angle representation** :math:`\mathbf{a}^{ijk} \in \mathbb{R}^{a_{dim}}`: three-body features + + The update equations for layer :math:`l` are: + + .. math:: + \mathbf{n}^{i,l+1} = \mathbf{n}^{i,l} + \text{MLP}_n\left(\sum_{j \in \mathcal{N}(i)} \mathbf{e}^{ij,l}\right), + + .. math:: + \mathbf{e}^{ij,l+1} = \mathbf{e}^{ij,l} + \text{MLP}_e\left([\mathbf{n}^{i,l}, \mathbf{n}^{j,l}, \mathbf{e}^{ij,l}, \sum_k \mathbf{a}^{ijk,l}]\right), + + .. math:: + \mathbf{a}^{ijk,l+1} = \mathbf{a}^{ijk,l} + \text{MLP}_a\left([\mathbf{e}^{ij,l}, \mathbf{e}^{ik,l}, \cos\theta_{jik}]\right). + + The final descriptor is computed via symmetrization: + + .. math:: + \mathcal{D}^i = \frac{1}{N_c^2} (\mathcal{N}^i)^T \mathcal{E}^i (\mathcal{E}^i)^T \mathcal{N}^i_<, + + where :math:`\mathcal{N}^i_<` denotes the first `axis_neuron` columns of :math:`\mathcal{N}^i`. + Parameters ---------- n_dim : int, optional diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py index 65248ab88d..a503963f61 100644 --- a/deepmd/dpmodel/descriptor/repformers.py +++ b/deepmd/dpmodel/descriptor/repformers.py @@ -84,6 +84,36 @@ class DescrptBlockRepformers(NativeOP, DescriptorBlock): r""" The repformer descriptor block. + The repformer block iteratively updates single-atom (:math:`\mathcal{G}_1`), + pair-atom (:math:`\mathcal{G}_2`), and equivariant pair-atom (:math:`\mathcal{H}_2`) + representations through multiple layers: + + **Update of :math:`\mathcal{G}_1` (single-atom representation):** + + The update can include multiple terms: + + - Convolution term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{MLP}(\sum_j \mathcal{G}_2^{ij,l} \odot \mathcal{G}_1^{j,l})` + - GRRG term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{MLP}((\mathcal{G}_2^{i,l})^T \mathcal{H}_2^{i,l} (\mathcal{H}_2^{i,l})^T \mathcal{G}_{2,<}^{i,l})` + - DRRD term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{MLP}((\mathcal{G}_1^{j,l})^T \mathcal{H}_2^{i,l} (\mathcal{H}_2^{i,l})^T \mathcal{G}_{1,<}^{j,l})` + - Attention term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{SelfAttention}(\mathcal{G}_1^{i,l}, \mathcal{G}_1^{j,l})` + + **Update of :math:`\mathcal{G}_2` (pair-atom representation):** + + - G1xG1 term: :math:`\mathcal{G}_2^{ij,l+1} \leftarrow \mathcal{G}_2^{ij,l} + \mathrm{MLP}(\mathcal{G}_1^{i,l} \otimes \mathcal{G}_1^{j,l})` + - Attention term: :math:`\mathcal{G}_2^{ij,l+1} \leftarrow \mathcal{G}_2^{ij,l} + \mathrm{GatedSelfAttention}(\mathcal{G}_2^{ij,l})` + + **Update of :math:`\mathcal{H}_2` (equivariant pair-atom representation):** + + .. math:: + \mathcal{H}_2^{ij,l+1} = \mathcal{H}_2^{ij,l} + \mathrm{MLP}(\mathcal{G}_2^{ij,l}) \odot \mathcal{R}^{ij}. + + The final descriptor is the iteratively updated single-atom representation: + + .. math:: + \mathcal{D}^i = \mathcal{G}_1^{i,L}, + + where :math:`L` is the number of repformer layers. + Parameters ---------- rcut : float diff --git a/deepmd/dpmodel/descriptor/se_atten_v2.py b/deepmd/dpmodel/descriptor/se_atten_v2.py index 99074fb652..9d72740a34 100644 --- a/deepmd/dpmodel/descriptor/se_atten_v2.py +++ b/deepmd/dpmodel/descriptor/se_atten_v2.py @@ -33,6 +33,97 @@ @BaseDescriptor.register("se_atten_v2") class DescrptSeAttenV2(DescrptDPA1): + r"""Attention-based descriptor (version 2) which uses stripped type embedding. + + This descriptor inherits from :class:`DescrptDPA1` and uses the same attention-based + mechanism, but with `tebd_input_mode="strip"` by default. The descriptor + :math:`\mathcal{D}^i \in \mathbb{R}^{M \times M_{<}}` is computed as: + + .. math:: + \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<, + + where :math:`\hat{\mathcal{G}}^i` is the embedding matrix after self-attention layers, + and :math:`\mathcal{R}^i` is the coordinate matrix (see :class:`DescrptDPA1` for details). + + The key difference from DPA-1 is that the type embedding is processed by a separate + embedding network and combined multiplicatively with the radial embedding: + + .. math:: + \mathcal{G}^i = \mathcal{N}_r(s(r)) \odot \mathcal{N}_t(\mathcal{T}) + \mathcal{N}_r(s(r)), + + where :math:`\mathcal{N}_r` is the radial embedding network, :math:`\mathcal{N}_t` is + the type embedding network, and :math:`\odot` denotes element-wise multiplication. + + Parameters + ---------- + rcut: float + The cut-off radius :math:`r_c` + rcut_smth: float + From where the environment matrix should be smoothed :math:`r_s` + sel : list[int], int + list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius + int: the total maxmum number of atoms in the cut-off radius + ntypes : int + Number of element types + neuron : list[int] + Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}` + axis_neuron: int + Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix) + tebd_dim: int + Dimension of the type embedding + resnet_dt: bool + Time-step `dt` in the resnet construction: + y = x + dt * \phi (Wx + b) + trainable: bool + If the weights of this descriptors are trainable. + trainable_ln: bool + Whether to use trainable shift and scale weights in layer normalization. + ln_eps: float, Optional + The epsilon value for layer normalization. + type_one_side: bool + If 'False', type embeddings of both neighbor and central atoms are considered. + If 'True', only type embeddings of neighbor atoms are considered. + Default is 'False'. + attn: int + Hidden dimension of the attention vectors + attn_layer: int + Number of attention layers + attn_dotr: bool + If dot the angular gate to the attention weights + attn_mask: bool + (Only support False to keep consistent with other backend references.) + (Not used in this version. True option is not implemented.) + If mask the diagonal of attention weights + exclude_types : list[list[int]] + The excluded pairs of types which have no interaction with each other. + For example, `[[0, 1]]` means no interaction between type 0 and type 1. + env_protection: float + Protection parameter to prevent division by zero errors during environment matrix calculations. + set_davg_zero: bool + Set the shift of embedding net input to zero. + activation_function: str + The activation function in the embedding net. Supported options are |ACTIVATION_FN| + precision: str + The precision of the embedding net parameters. Supported options are |PRECISION| + scaling_factor: float + The scaling factor of normalization in calculations of attention weights. + If `temperature` is None, the scaling of attention weights is (N_dim * scaling_factor)**0.5 + normalize: bool + Whether to normalize the hidden vectors in attention weights calculation. + temperature: float + If not None, the scaling of attention weights is `temperature` itself. + concat_output_tebd: bool + Whether to concat type embedding at the output of the descriptor. + use_econf_tebd: bool, Optional + Whether to use electronic configuration type embedding. + use_tebd_bias : bool, Optional + Whether to use bias in the type embedding layer. + type_map: list[str], Optional + A list of strings. Give the name to each type of atoms. + seed : int, Optional + Random seed for initializing the network parameters. + """ + def __init__( self, rcut: float, diff --git a/deepmd/dpmodel/descriptor/se_r.py b/deepmd/dpmodel/descriptor/se_r.py index 4fdf50beba..5ea9ef525f 100644 --- a/deepmd/dpmodel/descriptor/se_r.py +++ b/deepmd/dpmodel/descriptor/se_r.py @@ -58,6 +58,25 @@ class DescrptSeR(NativeOP, BaseDescriptor): r"""DeepPot-SE_R constructed from only the radial information of atomic configurations. + The descriptor :math:`\mathcal{D}^i \in \mathbb{R}^{M}` is given by + + .. math:: + \mathcal{D}^i = \frac{1}{N_c} \sum_{j=1}^{N_c} \mathcal{N}(s(r_{ji})), + + where :math:`\mathcal{N}` is the embedding network, and :math:`s(r_{ji})` is the + smoothed radial distance between atom :math:`i` and its neighbor :math:`j`. + + The switching function :math:`s(r)` is defined as: + + .. math:: + s(r)= + \begin{cases} + \frac{1}{r}, & r