diff --git a/paddle/fluid/operators/layer_norm_op.cu b/paddle/fluid/operators/layer_norm_op.cu old mode 100755 new mode 100644 index 25c722358c4e326897cef98be2b62e5071959cf9..0410e0511586070add58a1246ea4ebf20e379649 --- a/paddle/fluid/operators/layer_norm_op.cu +++ b/paddle/fluid/operators/layer_norm_op.cu @@ -399,9 +399,9 @@ __global__ void LayerNormBackwardComputeGradInput( const U *__restrict__ mean, const U *__restrict__ var, const float epsilon, const U *gamma, T *grad_input) { #ifdef __HIPCC__ - for (auto i1 = hipBlockIdx_y; i1 < n1; i1 += hipGridDim_y) { + for (auto i1 = hipBlockIdx_x; i1 < n1; i1 += hipGridDim_x) { #else - for (auto i1 = blockIdx.y; i1 < n1; i1 += gridDim.y) { + for (auto i1 = blockIdx.x; i1 < n1; i1 += gridDim.x) { #endif U sum_loss1 = U(0); U sum_loss2 = U(0); @@ -867,9 +867,8 @@ static void LayerNormBackward(const T *x, const T *d_y, const U *scale, constexpr int BDIMX1 = 32; constexpr int BDIMY1 = 4; dim3 threads1(BDIMX1, BDIMY1, 1); - const dim3 blocks1(1, batch_size, 1); LayerNormBackwardComputeGradInput< - T, U, BDIMX1, BDIMY1><<>>( + T, U, BDIMX1, BDIMY1><<>>( d_y, x, batch_size, feature_size, mean, var, epsilon, scale, d_x); break; }