Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
67f11788
MegEngine
项目概览
MegEngine 天元
/
MegEngine
8 个月 前同步成功
通知
392
Star
4702
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
67f11788
编写于
7月 27, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(arm_common): add elemwise unary multithread support
GitOrigin-RevId: 8eac123f67224e283b368c515bf0b8e7ef565158
上级
3afa3893
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
25 addition
and
6 deletion
+25
-6
dnn/src/arm_common/elemwise/unary/algo.cpp
dnn/src/arm_common/elemwise/unary/algo.cpp
+17
-5
dnn/test/arm_common/elemwise.cpp
dnn/test/arm_common/elemwise.cpp
+7
-0
scripts/cmake-build/cross_build_android_arm_inference.sh
scripts/cmake-build/cross_build_android_arm_inference.sh
+1
-1
未找到文件。
dnn/src/arm_common/elemwise/unary/algo.cpp
浏览文件 @
67f11788
...
...
@@ -71,12 +71,19 @@ void ElemwiseImpl::AlgoUnary::exec(const KernParam& kern_param) const {
thin_function<void(const _type*, _type*, DType, DType, size_t)> \
run = OpCallerUnary<_op<_type, _type>, \
BcastType::VEC>::run; \
MEGDNN_DISPATCH_CPU_KERN( \
auto kernel = [nr_elems, nr_elems_per_thread, src0, dst_tensor, \
run](size_t task_id, size_t) { \
size_t offset = task_id * nr_elems_per_thread; \
size_t nr_elems_thread = \
std::min(nr_elems - offset, nr_elems_per_thread); \
run(static_cast<const _type*>(src0.raw_ptr) + offset, \
static_cast<_type*>(dst_tensor.raw_ptr) + offset, \
src0.layout.dtype, dst_tensor.layout.dtype, \
nr_elems_thread); \
}; \
MEGDNN_DISPATCH_MULTI_THREAD_CPU_KERN( \
static_cast<naive::HandleImpl*>(kern_param.handle), \
run(static_cast<const _type*>(src0.raw_ptr), \
static_cast<_type*>(dst_tensor.raw_ptr), \
src0.layout.dtype, dst_tensor.layout.dtype, \
nr_elems)); \
nr_threads, kernel); \
} \
MIDOUT_END(); \
return
...
...
@@ -86,7 +93,12 @@ void ElemwiseImpl::AlgoUnary::exec(const KernParam& kern_param) const {
auto
&
src0
=
elparam
[
0
];
auto
&
dst_tensor
=
*
(
kern_param
.
m_dst
);
size_t
nr_threads
=
static_cast
<
naive
::
HandleImpl
*>
(
kern_param
.
handle
)
->
megcore_dispatcher
()
->
nr_threads
();
size_t
nr_elems
=
src0
.
layout
.
total_nr_elems
();
size_t
nr_elems_per_thread
=
(
nr_elems
+
nr_threads
-
1
)
/
nr_threads
;
#define DISPATCH_MODE_FLOAT(_case, _type, _type_midout_id) \
switch (kern_param.mode) { \
...
...
dnn/test/arm_common/elemwise.cpp
浏览文件 @
67f11788
...
...
@@ -26,6 +26,13 @@ TYPED_TEST(ARM_ELEMWISE, run) {
elemwise
::
run_test
<
TypeParam
>
(
this
->
handle
());
}
template
<
typename
tag
>
class
ARM_ELEMWISE_MULTI_THREADS
:
public
ARM_COMMON_MULTI_THREADS
{};
TYPED_TEST_CASE
(
ARM_ELEMWISE_MULTI_THREADS
,
elemwise
::
test_types
);
TYPED_TEST
(
ARM_ELEMWISE_MULTI_THREADS
,
run
)
{
elemwise
::
run_test
<
TypeParam
>
(
this
->
handle
());
}
TEST_F
(
ARM_COMMON
,
ELEMWISE_FORWARD_TERNARY
)
{
using
Mode
=
ElemwiseForward
::
Param
::
Mode
;
Checker
<
ElemwiseForward
>
checker
(
handle
());
...
...
scripts/cmake-build/cross_build_android_arm_inference.sh
浏览文件 @
67f11788
...
...
@@ -2,7 +2,7 @@
set
-e
ARCHS
=(
"arm64-v8a"
"armeabi-v7a"
)
BUILD_TYPE
=
Rel
WithDebInfo
BUILD_TYPE
=
Rel
ease
MGE_ARMV8_2_FEATURE_FP16
=
OFF
MGE_DISABLE_FLOAT16
=
OFF
ARCH
=
arm64-v8a
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录