diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py
index a51a2cd0a53d63fe4215b6bf89e9f3a5108580a7..7a7c1fb103477aeffbe40564f85884f4fe79b0a2 100644
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -441,22 +441,22 @@ def softplus(inp: Tensor) -> Tensor:
 
 def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor:
     r"""
-    Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional
-    input Tensor. The LogSoftmax formulation can be simplified as:
+    Applies the :math:`\log(\text{softmax}(x))` function to an n-dimensional
+    input tensor. The :math:`\text{logsoftmax}(x)` formulation can be simplified as:
 
     .. math::
-        \text{LogSoftmax}(x_{i}) = \log(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} )
+        \text{logsoftmax}(x_{i}) = \log(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} )
 
     For numerical stability the implementation follows this transformation:
 
     .. math::
-        \operatorname{logsoftmax}(x)
+        \text{logsoftmax}(x)
         = \log (\frac{\exp (x)}{\sum_{i}(\exp (x_{i}))})
         = x - \log (\sum_{i}(\exp (x_{i})))
-        = x - logsumexp(x)
+        = x - \text{logsumexp}(x)
 
     :param inp: input tensor.
-    :param axis: axis along which logsoftmax will be applied.
+    :param axis: axis along which :math:`\text{logsoftmax}(x)` will be applied.
 
     Examples:
 
@@ -487,8 +487,8 @@ def logsigmoid(inp: Tensor) -> Tensor:
 
     .. math::
         \text{logsigmoid}(x) = \log(\frac{ 1 }{ 1 + \exp(-x)})
-        = \log(1/(1 + exp(-x)))
-        = - \log(1 + exp(-x))
+        = \log(1/(1 + \exp(-x)))
+        = - \log(1 + \exp(-x))
         = - \text{softplus}(-x)
 
     :param inp: input tensor.
@@ -524,14 +524,14 @@ def logsumexp(
 
     .. math::
 
-        \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
+        \text{logsumexp}(x)= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
 
     For numerical stability, the implementation follows this transformation:
 
     .. math::
 
-        \operatorname{logsumexp}(\boldsymbol{x})= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
-        = \operatorname{logsumexp}(\boldsymbol{x})=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right)
+        \text{logsumexp}(x)= \log \sum_{j=1}^{n} \exp \left(x_{j}\right)
+        = \text{logsumexp}(x)=b+\log \sum_{j=1}^{n} \exp \left(x_{j}-b\right)
 
     where
 
@@ -578,10 +578,10 @@ def _get_softmax_axis(ndim: int) -> int:
 
 def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor:
     r"""
-    Applies a softmax function. Softmax is defined as:
+    Applies a :math:`\text{softmax}(x)` function. :math:`\text{softmax}(x)` is defined as:
 
     .. math::
-            \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
+            \text{softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
 
     It is applied to all elements along axis, and rescales elements so that
     they stay in the range `[0, 1]` and sum to 1.
@@ -589,8 +589,8 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor:
     See :class:`~megengine.module.activation.Softmax` for more details.
 
     :param inp: input tensor.
-    :param axis: an axis along which softmax will be applied. By default,
-        softmax will apply along the highest ranked axis.
+    :param axis: an axis along which :math:`\text{softmax}(x)` will be applied. By default,
+        :math:`\text{softmax}(x)` will apply along the highest ranked axis.
 
     Examples: