Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
af42ce7e
MegEngine
项目概览
MegEngine 天元
/
MegEngine
9 个月 前同步成功
通知
392
Star
4702
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
af42ce7e
编写于
1月 30, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(megdnn): some fixes of execution policy
GitOrigin-RevId: 920f39bcb6145dbcce9c693a6cc648ac5d979cfc
上级
7afa422d
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
91 addition
and
22 deletion
+91
-22
dnn/src/cuda/conv_bias/batched_matmul.cpp
dnn/src/cuda/conv_bias/batched_matmul.cpp
+5
-0
dnn/src/cuda/conv_bias/group_conv.cpp
dnn/src/cuda/conv_bias/group_conv.cpp
+2
-1
dnn/test/common/matrix_mul.cpp
dnn/test/common/matrix_mul.cpp
+34
-4
dnn/test/common/matrix_mul.h
dnn/test/common/matrix_mul.h
+10
-4
dnn/test/cuda/batched_matrix_mul.cpp
dnn/test/cuda/batched_matrix_mul.cpp
+25
-0
dnn/test/cuda/dilated_convolution.cpp
dnn/test/cuda/dilated_convolution.cpp
+4
-4
dnn/test/cuda/matrix_mul.cpp
dnn/test/cuda/matrix_mul.cpp
+11
-9
未找到文件。
dnn/src/cuda/conv_bias/batched_matmul.cpp
浏览文件 @
af42ce7e
...
...
@@ -44,6 +44,9 @@ std::pair<TensorLayoutArray, MatrixMulForward::Param> sub_opr_config(
B
.
dtype
=
src_layout
.
dtype
;
C
=
{{
dst_layout
.
shape
[
0
],
dst_layout
.
shape
[
1
],
B
.
shape
[
2
]},
dst_layout
.
dtype
};
C
.
stride
[
2
]
=
1
;
C
.
stride
[
1
]
=
dst_layout
.
stride
[
1
];
C
.
stride
[
0
]
=
dst_layout
.
stride
[
0
];
MatrixMulForward
::
Param
param
;
if
(
opr
->
param
().
compute_mode
==
param
::
Convolution
::
ComputeMode
::
FLOAT32
)
{
...
...
@@ -89,6 +92,8 @@ bool ConvBiasForwardImpl::AlgoBatchedMatmul::is_available(
return
false
;
auto
config
=
prepare_sub_opr
(
args
);
//! The dst of batched matmul should be contiguous
if
(
!
config
.
first
[
2
].
is_contiguous
())
return
false
;
auto
&&
fm
=
args
.
filter_meta
;
return
fm
.
format
==
Param
::
Format
::
NCHW
&&
...
...
dnn/src/cuda/conv_bias/group_conv.cpp
浏览文件 @
af42ce7e
...
...
@@ -109,7 +109,8 @@ void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec(
auto
sub_args
=
args
;
sub_args
.
dst_tensor
=
&
conv_dst_tensor
;
sub_args
.
dst_layout
=
&
conv_dst_tensor
.
layout
;
TensorND
tsrc
{
*
args
.
src_tensor
},
tdst
{
conv_dst_tensor
},
tbias
{
*
args
.
bias_tensor
};
TensorND
tsrc
{
*
args
.
src_tensor
},
tdst
{
conv_dst_tensor
},
tbias
{
*
args
.
bias_tensor
};
SmallVector
<
size_t
>
flt_shape
(
0
);
std
::
vector
<
ptrdiff_t
>
flt_stride
(
0
);
size_t
idx
=
0
;
...
...
dnn/test/common/matrix_mul.cpp
浏览文件 @
af42ce7e
...
...
@@ -17,6 +17,8 @@
using
namespace
megdnn
;
using
namespace
test
;
constexpr
size_t
matrix_mul
::
TestArg
::
UNSET_STRIDE_VAL
;
std
::
vector
<
matrix_mul
::
TestArg
>
matrix_mul
::
get_matmul_args_no_mask
()
{
std
::
vector
<
TestArg
>
args
;
...
...
@@ -57,7 +59,9 @@ matrix_mul::get_batched_matmul_args_cublaslt() {
// so please uncomment it if the bug is fixed
for
(
size_t
k
:
{
32
,
64
})
{
args
.
emplace_back
(
m
,
n
,
k
,
0
,
0
,
0
,
0
,
2
);
args
.
emplace_back
(
m
,
n
,
k
,
0
,
TestArg
::
UNSET_STRIDE_VAL
,
TestArg
::
UNSET_STRIDE_VAL
,
TestArg
::
UNSET_STRIDE_VAL
,
2
);
}
}
}
...
...
@@ -70,7 +74,9 @@ matrix_mul::get_batched_matmul_args_int8x8x32() {
for
(
size_t
m
:
{
1
,
2
,
3
,
4
,
5
,
8
,
64
})
{
for
(
size_t
n
:
{
1
,
2
,
3
,
4
,
5
,
8
,
64
})
{
for
(
size_t
k
:
{
1
,
2
,
3
,
4
,
5
,
8
,
64
})
{
args
.
emplace_back
(
m
,
n
,
k
,
0
,
0
,
0
,
0
,
2
);
args
.
emplace_back
(
m
,
n
,
k
,
0
,
TestArg
::
UNSET_STRIDE_VAL
,
TestArg
::
UNSET_STRIDE_VAL
,
TestArg
::
UNSET_STRIDE_VAL
,
2
);
}
}
}
...
...
@@ -136,6 +142,30 @@ std::vector<matrix_mul::TestArg> matrix_mul::get_batched_matmul_args() {
return
args
;
}
std
::
vector
<
matrix_mul
::
TestArg
>
matrix_mul
::
get_batched_matmul_broadcast_args
()
{
std
::
vector
<
TestArg
>
args
;
for
(
size_t
mask
=
0
;
mask
<
4
;
++
mask
)
{
std
::
vector
<
TestArg
>
args_temp
=
matrix_mul
::
get_batched_matmul_broadcast_args_mask
(
mask
);
for
(
auto
arg
:
args_temp
)
args
.
emplace_back
(
arg
);
}
return
args
;
}
std
::
vector
<
matrix_mul
::
TestArg
>
matrix_mul
::
get_batched_matmul_broadcast_args_mask
(
uint8_t
mask
)
{
std
::
vector
<
TestArg
>
args
;
std
::
vector
<
TestArg
>
args_temp
=
matrix_mul
::
get_batched_matmul_args_mask
(
mask
);
for
(
auto
arg
:
args_temp
)
{
args
.
emplace_back
(
arg
);
args
.
back
().
A_batch_stride
=
0
;
}
return
args
;
}
template
<
typename
Opr
>
void
matrix_mul
::
check_matrix_mul
(
DType
A_dtype
,
DType
B_dtype
,
DType
C_dtype
,
Handle
*
handle
,
...
...
@@ -170,9 +200,9 @@ void matrix_mul::check_matrix_mul(DType A_dtype, DType B_dtype, DType C_dtype,
checker
.
set_rng
(
0
,
rng
.
get
()).
set_rng
(
1
,
rng
.
get
());
}
//! return expect if stride ==
0
, stride otherwise
//! return expect if stride ==
-1
, stride otherwise
auto
stride_val
=
[](
size_t
stride
,
size_t
expect
)
->
size_t
{
if
(
stride
==
0
)
{
if
(
stride
==
TestArg
::
UNSET_STRIDE_VAL
)
{
return
expect
;
}
else
{
return
stride
;
...
...
dnn/test/common/matrix_mul.h
浏览文件 @
af42ce7e
...
...
@@ -24,15 +24,19 @@ namespace matrix_mul {
// mask & 1 denotes transposeA; mask & 2 denotes transposeB
struct
TestArg
{
constexpr
static
size_t
UNSET_STRIDE_VAL
=
static_cast
<
size_t
>
(
-
1
);
size_t
m
,
n
,
k
,
mask
;
size_t
A_stride
,
B_stride
,
C_stride
,
b
;
size_t
A_batch_stride
,
B_batch_stride
,
C_batch_stride
;
// stride = 0 means the default stride, the dim is contiguous, i.e. the
// stride value which makes tensor compact.
TestArg
(
size_t
m
,
size_t
n
,
size_t
k
,
size_t
mask
,
size_t
A_stride
=
0
,
size_t
B_stride
=
0
,
size_t
C_stride
=
0
,
size_t
b
=
1
,
size_t
A_batch_stride
=
0
,
size_t
B_batch_stride
=
0
,
size_t
C_batch_stride
=
0
)
TestArg
(
size_t
m
,
size_t
n
,
size_t
k
,
size_t
mask
,
size_t
A_stride
=
UNSET_STRIDE_VAL
,
size_t
B_stride
=
UNSET_STRIDE_VAL
,
size_t
C_stride
=
UNSET_STRIDE_VAL
,
size_t
b
=
1
,
size_t
A_batch_stride
=
UNSET_STRIDE_VAL
,
size_t
B_batch_stride
=
UNSET_STRIDE_VAL
,
size_t
C_batch_stride
=
UNSET_STRIDE_VAL
)
:
m
{
m
},
n
{
n
},
k
{
k
},
...
...
@@ -51,6 +55,8 @@ std::vector<TestArg> get_matmul_args_mask(uint8_t mask);
std
::
vector
<
TestArg
>
get_matmul_args
();
std
::
vector
<
TestArg
>
get_batched_matmul_args_mask
(
uint8_t
mask
);
std
::
vector
<
TestArg
>
get_batched_matmul_args
();
std
::
vector
<
TestArg
>
get_batched_matmul_broadcast_args
();
std
::
vector
<
TestArg
>
get_batched_matmul_broadcast_args_mask
(
uint8_t
mask
);
std
::
vector
<
TestArg
>
get_matmul_mk_packed_args
(
size_t
nbase
);
std
::
vector
<
TestArg
>
get_batched_matmul_args_cublaslt
();
std
::
vector
<
TestArg
>
get_batched_matmul_args_int8x8x32
();
...
...
dnn/test/cuda/batched_matrix_mul.cpp
浏览文件 @
af42ce7e
...
...
@@ -8,6 +8,7 @@
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include <vector>
#include "test/cuda/fixture.h"
#include "test/common/checker.h"
...
...
@@ -62,6 +63,30 @@ TEST_F(CUDA, BATCHED_MATRIX_MUL_LT_F32_PART4) {
#undef F32_TEST_PART
TEST_F
(
CUDA
,
BATCHED_MATRIX_MUL_F32_BROADCAST_PART1
){
matrix_mul
::
check_batched_matrix_mul
(
dtype
::
Float32
{},
dtype
::
Float32
{},
{},
handle_cuda
(),
"CUBLAS"
,
1e-3
,
matrix_mul
::
get_batched_matmul_broadcast_args_mask
(
0
));
}
TEST_F
(
CUDA
,
BATCHED_MATRIX_MUL_F32_BROADCAST_PART2
){
matrix_mul
::
check_batched_matrix_mul
(
dtype
::
Float32
{},
dtype
::
Float32
{},
{},
handle_cuda
(),
"CUBLAS"
,
1e-3
,
matrix_mul
::
get_batched_matmul_broadcast_args_mask
(
1
));
}
TEST_F
(
CUDA
,
BATCHED_MATRIX_MUL_F32_BROADCAST_PART3
){
matrix_mul
::
check_batched_matrix_mul
(
dtype
::
Float32
{},
dtype
::
Float32
{},
{},
handle_cuda
(),
"CUBLAS"
,
1e-3
,
matrix_mul
::
get_batched_matmul_broadcast_args_mask
(
2
));
}
TEST_F
(
CUDA
,
BATCHED_MATRIX_MUL_F32_BROADCAST_PART4
){
matrix_mul
::
check_batched_matrix_mul
(
dtype
::
Float32
{},
dtype
::
Float32
{},
{},
handle_cuda
(),
"CUBLAS"
,
1e-3
,
matrix_mul
::
get_batched_matmul_broadcast_args_mask
(
3
));
}
TEST_F
(
CUDA
,
BATCHED_MATRIX_MUL_F32_BRUTE_FORCE_PART1
)
{
matrix_mul
::
check_batched_matrix_mul
(
dtype
::
Float32
{},
dtype
::
Float32
{},
{},
handle_cuda
(),
...
...
dnn/test/cuda/dilated_convolution.cpp
浏览文件 @
af42ce7e
...
...
@@ -75,8 +75,8 @@ TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_DATA)
"CUDNN_CONVOLUTION_BWD_DATA_ALGO_1"
CUDNN_VERSION_STRING
));
printf
(
"cudnn version >= 7.5, use cudnn impl for dilated convolution
\n
"
);
#else
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardData
>
(
"MATMUL"
));
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardData
>
(
ExecutionPolicyAlgoName
{
"MATMUL"
,
{{
"CUBLAS"
,
{}}}}
));
#endif
NormalRNG
default_rng
;
for
(
auto
&&
arg
:
args
)
{
...
...
@@ -139,8 +139,8 @@ TEST_F(CUDA, DILATED_CONVOLUTION_BACKWARD_FILTER)
"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1"
CUDNN_VERSION_STRING
));
printf
(
"cudnn version >= 7.5, use cudnn impl for dilated convolution
\n
"
);
#else
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardFilter
>
(
"MATMUL"
));
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardFilter
>
(
ExecutionPolicyAlgoName
{
"MATMUL"
,
{{
"CUBLAS"
,
{}}}}
));
#endif
NormalRNG
default_rng
;
bool
first_run
=
true
;
...
...
dnn/test/cuda/matrix_mul.cpp
浏览文件 @
af42ce7e
...
...
@@ -51,7 +51,8 @@ TEST_F(CUDA, MATRIX_MUL_QUANTIZED4x4x32) {
if
(
cuda
::
current_device_prop
().
major
<
7
||
(
cuda
::
current_device_prop
().
major
==
7
&&
cuda
::
current_device_prop
().
minor
<
5
))
{
printf
(
"Skip CUDA.MATRIX_MUL_QUANTIZED4x4x32 test as current device doesn't support
\n
"
);
printf
(
"Skip CUDA.MATRIX_MUL_QUANTIZED4x4x32 test as current device "
"doesn't support
\n
"
);
return
;
}
Checker
<
MatrixMul
>
checker
(
handle_cuda
(),
false
);
...
...
@@ -257,19 +258,19 @@ TEST_F(CUDA, MATRIX_MUL) {
BS
=
TensorShape
{
k
,
n
};
CS
=
TensorShape
{
m
,
n
};
TensorLayout
AL
,
BL
,
CL
;
if
(
arg
.
A_stride
==
0
)
{
if
(
arg
.
A_stride
==
matrix_mul
::
TestArg
::
UNSET_STRIDE_VAL
)
{
AL
=
TensorLayout
(
AS
,
dtype
::
Float32
());
}
else
{
AL
=
TensorLayout
(
AS
,
{
ptrdiff_t
(
arg
.
A_stride
),
1
},
dtype
::
Float32
());
}
if
(
arg
.
B_stride
==
0
)
{
if
(
arg
.
B_stride
==
matrix_mul
::
TestArg
::
UNSET_STRIDE_VAL
)
{
BL
=
TensorLayout
(
BS
,
dtype
::
Float32
());
}
else
{
BL
=
TensorLayout
(
BS
,
{
ptrdiff_t
(
arg
.
B_stride
),
1
},
dtype
::
Float32
());
}
if
(
arg
.
C_stride
==
0
)
{
if
(
arg
.
C_stride
==
matrix_mul
::
TestArg
::
UNSET_STRIDE_VAL
)
{
CL
=
TensorLayout
(
CS
,
dtype
::
Float32
());
}
else
{
CL
=
TensorLayout
(
CS
,
{
ptrdiff_t
(
arg
.
C_stride
),
1
},
...
...
@@ -285,8 +286,9 @@ TEST_F(CUDA, MATRIX_MUL_CUBLASLT)
NormalRNG
normal_rng
;
Checker
<
MatrixMul
>
checker
(
handle_cuda
());
checker
.
set_rng
(
0
,
&
normal_rng
)
.
set_rng
(
1
,
&
normal_rng
)
.
set_before_exec_callback
(
AlgoChecker
<
MatrixMulForward
>
(
"CUBLAS_LT"
));
.
set_rng
(
1
,
&
normal_rng
)
.
set_before_exec_callback
(
AlgoChecker
<
MatrixMulForward
>
(
"CUBLAS_LT"
));
using
Param
=
MatrixMul
::
Param
;
size_t
m
=
32
,
n
=
32
,
k
=
32
;
// test Int8 matmul
...
...
@@ -350,19 +352,19 @@ TEST_F(CUDA, MATRIX_MUL_CUBLASLT)
BS
=
TensorShape
{
k
,
n
};
CS
=
TensorShape
{
m
,
n
};
TensorLayout
AL
,
BL
,
CL
;
if
(
arg
.
A_stride
==
0
)
{
if
(
arg
.
A_stride
==
matrix_mul
::
TestArg
::
UNSET_STRIDE_VAL
)
{
AL
=
TensorLayout
(
AS
,
dtype
::
Float32
());
}
else
{
AL
=
TensorLayout
(
AS
,
{
ptrdiff_t
(
arg
.
A_stride
),
1
},
dtype
::
Float32
());
}
if
(
arg
.
B_stride
==
0
)
{
if
(
arg
.
B_stride
==
matrix_mul
::
TestArg
::
UNSET_STRIDE_VAL
)
{
BL
=
TensorLayout
(
BS
,
dtype
::
Float32
());
}
else
{
BL
=
TensorLayout
(
BS
,
{
ptrdiff_t
(
arg
.
B_stride
),
1
},
dtype
::
Float32
());
}
if
(
arg
.
C_stride
==
0
)
{
if
(
arg
.
C_stride
==
matrix_mul
::
TestArg
::
UNSET_STRIDE_VAL
)
{
CL
=
TensorLayout
(
CS
,
dtype
::
Float32
());
}
else
{
CL
=
TensorLayout
(
CS
,
{
ptrdiff_t
(
arg
.
C_stride
),
1
},
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录