提交 7a3a5be4 编写于 作者: N nihuini

fix requantize x86 pack4to1

上级 05f6d2cf
......@@ -714,10 +714,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
......@@ -735,10 +735,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
......@@ -755,10 +755,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
......@@ -780,10 +780,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
......@@ -802,10 +802,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
......@@ -823,10 +823,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
......@@ -848,10 +848,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
......@@ -870,10 +870,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
......@@ -891,10 +891,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
......@@ -915,10 +915,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else if (bias_data_size == 1)
......@@ -938,10 +938,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
else
......@@ -960,10 +960,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr[0] = (v >> 56) & 0xff;
ptr[1] = (v >> 48) & 0xff;
ptr[2] = (v >> 40) & 0xff;
ptr[3] = (v >> 32) & 0xff;
ptr[0] = (v >> 32) & 0xff;
ptr[1] = (v >> 40) & 0xff;
ptr[2] = (v >> 48) & 0xff;
ptr[3] = (v >> 56) & 0xff;
}
}
}
......@@ -1072,10 +1072,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;
intptr += 4;
ptr0 += 1;
......@@ -1107,10 +1107,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;
intptr += 4;
ptr0 += 1;
......@@ -1228,10 +1228,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;
intptr += 4;
ptr0 += 1;
......@@ -1263,10 +1263,10 @@ int Requantize_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option&
_v = activation_sse(_v, activation_type, activation_params);
_v = _mm_mul_ps(_v, _scale_out);
int64_t v = float2int8_sse(_v, _v);
ptr0[0] = (v >> 56) & 0xff;
ptr1[0] = (v >> 48) & 0xff;
ptr2[0] = (v >> 40) & 0xff;
ptr3[0] = (v >> 32) & 0xff;
ptr0[0] = (v >> 32) & 0xff;
ptr1[0] = (v >> 40) & 0xff;
ptr2[0] = (v >> 48) & 0xff;
ptr3[0] = (v >> 56) & 0xff;
intptr += 4;
ptr0 += 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册