deepmodeling · njzjz · Feb 27, 2026 · Feb 22, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/deepmd/dpmodel/descriptor/dpa1.py b/deepmd/dpmodel/descriptor/dpa1.py
@@ -671,6 +671,84 @@ def update_sel(
 
 @DescriptorBlock.register("se_atten")
 class DescrptBlockSeAtten(NativeOP, DescriptorBlock):
+    r"""The attention-based descriptor block.
+
+    This block computes an embedding matrix using attention mechanism and type embedding.
+    The descriptor is computed as:
+
+    .. math::
+        \mathcal{D}^i = \frac{1}{N_c^2}(\hat{\mathcal{G}}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \hat{\mathcal{G}}^i_<,
+
+    where :math:`\hat{\mathcal{G}}^i` is the embedding matrix after self-attention layers,
+    :math:`\mathcal{R}^i` is the coordinate matrix, and :math:`\hat{\mathcal{G}}^i_<` denotes
+    the first `axis_neuron` columns of :math:`\hat{\mathcal{G}}^i`.
+
+    The embedding matrix :math:`\mathcal{G}^i` is computed by:
+
+    .. math::
+        (\mathcal{G}^i)_j = \mathcal{N}(s(r_{ji}), \mathcal{T}_i, \mathcal{T}_j),
+
+    where :math:`\mathcal{N}` is the embedding network, :math:`s(r_{ji})` is the smoothed
+    radial distance, and :math:`\mathcal{T}` denotes type embedding.
+
+    Parameters
+    ----------
+    rcut : float
+        The cut-off radius.
+    rcut_smth : float
+        Where to start smoothing.
+    sel : Union[list[int], int]
+        Maximally possible number of selected neighbors.
+    ntypes : int
+        Number of element types.
+    neuron : list[int], optional
+        Number of neurons in each hidden layer of the embedding net.
+    axis_neuron : int, optional
+        Size of the submatrix of the embedding matrix.
+    tebd_dim : int, optional
+        Dimension of the type embedding.
+    tebd_input_mode : str, optional
+        The input mode of the type embedding. Supported modes are ["concat", "strip"].
+    resnet_dt : bool, optional
+        Time-step `dt` in the resnet construction.
+    type_one_side : bool, optional
+        If True, only type embeddings of neighbor atoms are considered.
+    attn : int, optional
+        Hidden dimension of the attention vectors.
+    attn_layer : int, optional
+        Number of attention layers.
+    attn_dotr : bool, optional
+        If True, dot the angular gate to the attention weights.
+    attn_mask : bool, optional
+        If True, mask the diagonal of attention weights.
+    exclude_types : list[tuple[int, int]], optional
+        The excluded pairs of types which have no interaction.
+    env_protection : float, optional
+        Protection parameter to prevent division by zero.
+    set_davg_zero : bool, optional
+        Set the shift of embedding net input to zero.
+    activation_function : str, optional
+        The activation function in the embedding net.
+    precision : str, optional
+        The precision of the embedding net parameters.
+    scaling_factor : float, optional
+        The scaling factor of normalization in attention weights calculation.
+    normalize : bool, optional
+        Whether to normalize the hidden vectors in attention weights calculation.
+    temperature : float, optional
+        If not None, the scaling of attention weights is `temperature` itself.
+    trainable_ln : bool, optional
+        Whether to use trainable shift and scale weights in layer normalization.
+    ln_eps : float, optional
+        The epsilon value for layer normalization.
+    smooth : bool, optional
+        Whether to use smoothness in attention weights calculation.
+    seed : int, optional
+        Random seed for parameter initialization.
+    trainable : bool, optional
+        If the parameters are trainable.
+    """
+
     def __init__(
         self,
         rcut: float,

diff --git a/deepmd/dpmodel/descriptor/dpa2.py b/deepmd/dpmodel/descriptor/dpa2.py
@@ -369,6 +369,78 @@ def deserialize(cls, data: dict) -> "RepformerArgs":
 
 @BaseDescriptor.register("dpa2")
 class DescrptDPA2(NativeOP, BaseDescriptor):
+    r"""The DPA-2 descriptor[1]_.
+
+    The DPA-2 descriptor combines a repinit block and a repformer block to extract
+    atomic representations. The overall descriptor is computed as:
+
+    .. math::
+        \mathcal{D}^i = \mathrm{Repformer}(\mathrm{Linear}(\mathrm{Repinit}(\mathcal{R}^i, \mathcal{T}^i))),
+
+    where :math:`\mathcal{R}^i` is the environment matrix and :math:`\mathcal{T}^i` is the
+    type embedding.
+
+    The repinit block computes initial node and edge representations using attention-based
+    message passing. The repformer block further refines these representations through
+    multiple layers of graph convolution and attention mechanisms.
+
+    The final output dimension is:
+
+    .. math::
+        \dim(\mathcal{D}^i) = \text{g1\_dim} + \text{tebd\_dim} \quad (\text{if concat\_output\_tebd}).
+
+    Parameters
+    ----------
+    repinit : Union[RepinitArgs, dict]
+        The arguments used to initialize the repinit block, see docstr in `RepinitArgs` for details information.
+    repformer : Union[RepformerArgs, dict]
+        The arguments used to initialize the repformer block, see docstr in `RepformerArgs` for details information.
+    concat_output_tebd : bool, optional
+        Whether to concat type embedding at the output of the descriptor.
+    precision : str, optional
+        The precision of the embedding net parameters.
+    smooth : bool, optional
+        Whether to use smoothness in processes such as attention weights calculation.
+    exclude_types : list[list[int]], optional
+        The excluded pairs of types which have no interaction with each other.
+        For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    env_protection : float, optional
+        Protection parameter to prevent division by zero errors during environment matrix calculations.
+        For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection.
+    trainable : bool, optional
+        If the parameters are trainable.
+    seed : int, optional
+        (Unused yet) Random seed for parameter initialization.
+    add_tebd_to_repinit_out : bool, optional
+        Whether to add type embedding to the output representation from repinit before inputting it into repformer.
+    use_econf_tebd : bool, Optional
+        Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+        Whether to use bias in the type embedding layer.
+    type_map : list[str], Optional
+        A list of strings. Give the name to each type of atoms.
+
+    Returns
+    -------
+    descriptor:         torch.Tensor
+        the descriptor of shape nf x nloc x g1_dim.
+        invariant single-atom representation.
+    g2:                 torch.Tensor
+        invariant pair-atom representation.
+    h2:                 torch.Tensor
+        equivariant pair-atom representation.
+    rot_mat:            torch.Tensor
+        rotation matrix for equivariant fittings
+    sw:                 torch.Tensor
+        The switch function for decaying inverse distance.
+
+    References
+    ----------
+    .. [1] Zhang, D., Liu, X., Zhang, X. et al. DPA-2: a
+       large atomic model as a multi-task learner. npj
+       Comput Mater 10, 293 (2024). https://doi.org/10.1038/s41524-024-01493-2
+    """
+
     def __init__(
         self,
         ntypes: int,
@@ -389,60 +461,6 @@ def __init__(
         use_tebd_bias: bool = False,
         type_map: list[str] | None = None,
     ) -> None:
-        r"""The DPA-2 descriptor[1]_.
-
-        Parameters
-        ----------
-        repinit : Union[RepinitArgs, dict]
-            The arguments used to initialize the repinit block, see docstr in `RepinitArgs` for details information.
-        repformer : Union[RepformerArgs, dict]
-            The arguments used to initialize the repformer block, see docstr in `RepformerArgs` for details information.
-        concat_output_tebd : bool, optional
-            Whether to concat type embedding at the output of the descriptor.
-        precision : str, optional
-            The precision of the embedding net parameters.
-        smooth : bool, optional
-            Whether to use smoothness in processes such as attention weights calculation.
-        exclude_types : list[list[int]], optional
-            The excluded pairs of types which have no interaction with each other.
-            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
-        env_protection : float, optional
-            Protection parameter to prevent division by zero errors during environment matrix calculations.
-            For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection.
-        trainable : bool, optional
-            If the parameters are trainable.
-        seed : int, optional
-            (Unused yet) Random seed for parameter initialization.
-        add_tebd_to_repinit_out : bool, optional
-            Whether to add type embedding to the output representation from repinit before inputting it into repformer.
-        use_econf_tebd : bool, Optional
-            Whether to use electronic configuration type embedding.
-        use_tebd_bias : bool, Optional
-            Whether to use bias in the type embedding layer.
-        type_map : list[str], Optional
-            A list of strings. Give the name to each type of atoms.
-
-        Returns
-        -------
-        descriptor:         torch.Tensor
-            the descriptor of shape nf x nloc x g1_dim.
-            invariant single-atom representation.
-        g2:                 torch.Tensor
-            invariant pair-atom representation.
-        h2:                 torch.Tensor
-            equivariant pair-atom representation.
-        rot_mat:            torch.Tensor
-            rotation matrix for equivariant fittings
-        sw:                 torch.Tensor
-            The switch function for decaying inverse distance.
-
-        References
-        ----------
-        .. [1] Zhang, D., Liu, X., Zhang, X. et al. DPA-2: a
-           large atomic model as a multi-task learner. npj
-           Comput Mater 10, 293 (2024). https://doi.org/10.1038/s41524-024-01493-2
-        """
-
         def init_subclass_params(sub_data: dict | Any, sub_class: type) -> Any:
             if isinstance(sub_data, dict):
                 return sub_class(**sub_data)

diff --git a/deepmd/dpmodel/descriptor/dpa3.py b/deepmd/dpmodel/descriptor/dpa3.py
@@ -59,6 +59,27 @@
 class RepFlowArgs:
     r"""The constructor for the RepFlowArgs class which defines the parameters of the repflow block in DPA3 descriptor.
 
+    The DPA-3 descriptor uses a repflow architecture that maintains and updates three types
+    of representations: node (:math:`\mathbf{n}`), edge (:math:`\mathbf{e}`), and angle (:math:`\mathbf{a}`).
+
+    The update equations for each layer are:
+
+    .. math::
+        \mathbf{n}^{l+1} = \text{UpdateNode}(\mathbf{n}^l, \mathbf{e}^l, \mathbf{a}^l),
+
+    .. math::
+        \mathbf{e}^{l+1} = \text{UpdateEdge}(\mathbf{n}^l, \mathbf{e}^l, \mathbf{a}^l),
+
+    .. math::
+        \mathbf{a}^{l+1} = \text{UpdateAngle}(\mathbf{n}^l, \mathbf{e}^l, \mathbf{a}^l).
+
+    The final descriptor is obtained by symmetrization:
+
+    .. math::
+        \mathcal{D}^i = \text{Symmetrize}(\mathbf{n}^L, \mathbf{e}^L),
+
+    where :math:`L` is the number of repflow layers.
+
     Parameters
     ----------
     n_dim : int, optional
@@ -254,6 +275,31 @@ def deserialize(cls, data: dict) -> "RepFlowArgs":
 class DescrptDPA3(NativeOP, BaseDescriptor):
     r"""The DPA3 descriptor[1]_.
 
+    The DPA-3 descriptor uses a repflow block to iteratively update node, edge, and angle
+    representations. The descriptor is computed as:
+
+    .. math::
+        \mathcal{D}^i = \mathrm{RepFlow}(\mathcal{N}^i, \mathcal{E}^i, \mathcal{A}^i),
+
+    where :math:`\mathcal{N}^i`, :math:`\mathcal{E}^i`, and :math:`\mathcal{A}^i` are the
+    initial node, edge, and angle representations respectively.
+
+    The repflow block performs iterative updates through multiple layers:
+
+    .. math::
+        \mathcal{N}^{i,l+1} = \mathrm{UpdateNode}(\mathcal{N}^{i,l}, \mathcal{E}^{i,l}, \mathcal{A}^{i,l}),
+
+    .. math::
+        \mathcal{E}^{i,l+1} = \mathrm{UpdateEdge}(\mathcal{N}^{i,l}, \mathcal{E}^{i,l}, \mathcal{A}^{i,l}),
+
+    .. math::
+        \mathcal{A}^{i,l+1} = \mathrm{UpdateAngle}(\mathcal{N}^{i,l}, \mathcal{E}^{i,l}, \mathcal{A}^{i,l}).
+
+    The final descriptor output dimension is:
+
+    .. math::
+        \dim(\mathcal{D}^i) = \text{n\_dim} \times \text{axis\_neuron} \quad (\text{after symmetrization}).
+
     Parameters
     ----------
     repflow : Union[RepFlowArgs, dict]

diff --git a/deepmd/dpmodel/descriptor/hybrid.py b/deepmd/dpmodel/descriptor/hybrid.py
@@ -33,7 +33,20 @@
 
 @BaseDescriptor.register("hybrid")
 class DescrptHybrid(BaseDescriptor, NativeOP):
-    """Concate a list of descriptors to form a new descriptor.
+    r"""Concatenate a list of descriptors to form a new descriptor.
+
+    The hybrid descriptor combines multiple descriptors by concatenation:
+
+    .. math::
+        \mathcal{D}^i = [\mathcal{D}^i_1, \mathcal{D}^i_2, ..., \mathcal{D}^i_n],
+
+    where :math:`\mathcal{D}^i_k` is the descriptor computed by the :math:`k`-th
+    sub-descriptor for atom :math:`i`.
+
+    The output dimension is the sum of all sub-descriptor dimensions:
+
+    .. math::
+        \dim(\mathcal{D}^i) = \sum_{k=1}^{n} \dim(\mathcal{D}^i_k).
 
     Parameters
     ----------

diff --git a/deepmd/dpmodel/descriptor/repflows.py b/deepmd/dpmodel/descriptor/repflows.py
@@ -63,6 +63,31 @@ class DescrptBlockRepflows(NativeOP, DescriptorBlock):
     r"""
     The repflow descriptor block.
 
+    The repflow descriptor maintains three types of representations and updates them
+    iteratively through message passing:
+
+    - **Node representation** :math:`\mathbf{n}^i \in \mathbb{R}^{n_{dim}}`: single-atom features
+    - **Edge representation** :math:`\mathbf{e}^{ij} \in \mathbb{R}^{e_{dim}}`: pair-atom features
+    - **Angle representation** :math:`\mathbf{a}^{ijk} \in \mathbb{R}^{a_{dim}}`: three-body features
+
+    The update equations for layer :math:`l` are:
+
+    .. math::
+        \mathbf{n}^{i,l+1} = \mathbf{n}^{i,l} + \text{MLP}_n\left(\sum_{j \in \mathcal{N}(i)} \mathbf{e}^{ij,l}\right),
+
+    .. math::
+        \mathbf{e}^{ij,l+1} = \mathbf{e}^{ij,l} + \text{MLP}_e\left([\mathbf{n}^{i,l}, \mathbf{n}^{j,l}, \mathbf{e}^{ij,l}, \sum_k \mathbf{a}^{ijk,l}]\right),
+
+    .. math::
+        \mathbf{a}^{ijk,l+1} = \mathbf{a}^{ijk,l} + \text{MLP}_a\left([\mathbf{e}^{ij,l}, \mathbf{e}^{ik,l}, \cos\theta_{jik}]\right).
+
+    The final descriptor is computed via symmetrization:
+
+    .. math::
+        \mathcal{D}^i = \frac{1}{N_c^2} (\mathcal{N}^i)^T \mathcal{E}^i (\mathcal{E}^i)^T \mathcal{N}^i_<,
+
+    where :math:`\mathcal{N}^i_<` denotes the first `axis_neuron` columns of :math:`\mathcal{N}^i`.
+
     Parameters
     ----------
     n_dim : int, optional

diff --git a/deepmd/dpmodel/descriptor/repformers.py b/deepmd/dpmodel/descriptor/repformers.py
@@ -84,6 +84,36 @@ class DescrptBlockRepformers(NativeOP, DescriptorBlock):
     r"""
     The repformer descriptor block.
 
+    The repformer block iteratively updates single-atom (:math:`\mathcal{G}_1`),
+    pair-atom (:math:`\mathcal{G}_2`), and equivariant pair-atom (:math:`\mathcal{H}_2`)
+    representations through multiple layers:
+
+    **Update of :math:`\mathcal{G}_1` (single-atom representation):**
+
+    The update can include multiple terms:
+
+    - Convolution term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{MLP}(\sum_j \mathcal{G}_2^{ij,l} \odot \mathcal{G}_1^{j,l})`
+    - GRRG term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{MLP}((\mathcal{G}_2^{i,l})^T \mathcal{H}_2^{i,l} (\mathcal{H}_2^{i,l})^T \mathcal{G}_{2,<}^{i,l})`
+    - DRRD term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{MLP}((\mathcal{G}_1^{j,l})^T \mathcal{H}_2^{i,l} (\mathcal{H}_2^{i,l})^T \mathcal{G}_{1,<}^{j,l})`
+    - Attention term: :math:`\mathcal{G}_1^{i,l+1} \leftarrow \mathcal{G}_1^{i,l} + \mathrm{SelfAttention}(\mathcal{G}_1^{i,l}, \mathcal{G}_1^{j,l})`
+
+    **Update of :math:`\mathcal{G}_2` (pair-atom representation):**
+
+    - G1xG1 term: :math:`\mathcal{G}_2^{ij,l+1} \leftarrow \mathcal{G}_2^{ij,l} + \mathrm{MLP}(\mathcal{G}_1^{i,l} \otimes \mathcal{G}_1^{j,l})`
+    - Attention term: :math:`\mathcal{G}_2^{ij,l+1} \leftarrow \mathcal{G}_2^{ij,l} + \mathrm{GatedSelfAttention}(\mathcal{G}_2^{ij,l})`
+
+    **Update of :math:`\mathcal{H}_2` (equivariant pair-atom representation):**
+
+    .. math::
+        \mathcal{H}_2^{ij,l+1} = \mathcal{H}_2^{ij,l} + \mathrm{MLP}(\mathcal{G}_2^{ij,l}) \odot \mathcal{R}^{ij}.
+
+    The final descriptor is the iteratively updated single-atom representation:
+
+    .. math::
+        \mathcal{D}^i = \mathcal{G}_1^{i,L},
+
+    where :math:`L` is the number of repformer layers.
+
     Parameters
     ----------
     rcut : float