Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
4aa277a2
MegEngine
项目概览
MegEngine 天元
/
MegEngine
9 个月 前同步成功
通知
392
Star
4702
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
4aa277a2
编写于
11月 06, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(dnn/cuda): misc
GitOrigin-RevId: 1f8f91a0cc7778493f5ed11310c19e2a5e080739
上级
094601e8
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
118 addition
and
49 deletion
+118
-49
dnn/src/cuda/conv_bias/algo.cpp
dnn/src/cuda/conv_bias/algo.cpp
+12
-11
dnn/src/cuda/conv_bias/algo.h
dnn/src/cuda/conv_bias/algo.h
+17
-4
dnn/src/cuda/conv_bias/opr_impl.cpp
dnn/src/cuda/conv_bias/opr_impl.cpp
+42
-8
dnn/src/cuda/conv_bias/opr_impl.h
dnn/src/cuda/conv_bias/opr_impl.h
+3
-10
dnn/test/common/comparator.inl
dnn/test/common/comparator.inl
+16
-0
dnn/test/common/conv_bias.cpp
dnn/test/common/conv_bias.cpp
+9
-1
dnn/test/common/tensor.inl
dnn/test/common/tensor.inl
+6
-5
dnn/test/cuda/conv_bias_int8.cpp
dnn/test/cuda/conv_bias_int8.cpp
+13
-10
未找到文件。
dnn/src/cuda/conv_bias/algo.cpp
浏览文件 @
4aa277a2
...
...
@@ -104,20 +104,19 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
ConvBiasForwardImpl
::
AlgoPack
ConvBiasForwardImpl
::
sm_algo_pack
;
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvBiasForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvBiasForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
:
SizeArgs
(
o
,
src
,
filter
,
o
->
check_layout_fwd
(
src
,
filter
,
dst
),
bias
,
z
,
dst
)
{}
z
,
dst
,
preprocessed_filter
)
{}
ConvBiasForwardImpl
::
AlgoBase
::
SizeArgs
::
SizeArgs
(
ConvBiasForwardImpl
*
o
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
:
BiasForwardSizeArgs
{
concrete_handle
(
o
->
handle
()),
&
src
,
&
filter
,
...
...
@@ -126,14 +125,16 @@ ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs(
filter_meta
,
&
dst
,
o
->
param
().
nonlineMode
},
opr
{
o
}
{}
opr
{
o
},
preprocessed_filter
{
preprocessed_filter
}
{}
ConvBiasForwardImpl
::
AlgoBase
::
ExecArgs
::
ExecArgs
(
ConvBiasForwardImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_in
z
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
,
const
PreprocessedFilter
*
preprocessed_filter
)
:
SizeArgs
(
opr
,
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
),
dst
.
layout
,
preprocessed_filter
),
src_tensor
{
&
src
},
filter_tensor
{
&
filter
},
bias_tensor
{
&
bias
},
...
...
dnn/src/cuda/conv_bias/algo.h
浏览文件 @
4aa277a2
...
...
@@ -41,16 +41,19 @@ public:
AlgoBase
()
:
Algorithm
()
{
m_handle_type
=
Handle
::
HandleType
::
CUDA
;
}
struct
SizeArgs
:
public
conv_bias
::
BiasForwardSizeArgs
{
ConvBiasForwardImpl
*
opr
;
const
PreprocessedFilter
*
preprocessed_filter
;
std
::
string
to_string
()
const
;
SizeArgs
(
ConvBiasForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
);
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
=
nullptr
);
SizeArgs
(
ConvBiasForwardImpl
*
opr
,
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
);
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
=
nullptr
);
void
init_conv_bias_desc
(
conv_bias
::
CUDNNForwardDescs
&
desc
)
const
{
desc
.
set_conv_bias
(
*
src_layout
,
filter_meta
,
*
dst_layout
,
...
...
@@ -69,11 +72,21 @@ public:
ExecArgs
(
ConvBiasForwardImpl
*
opr
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_in
z
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
);
_megdnn_workspace
workspace
,
const
PreprocessedFilter
*
preprocessed_filter
=
nullptr
);
};
virtual
bool
is_available
(
const
SizeArgs
&
args
)
const
=
0
;
virtual
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
=
0
;
virtual
void
exec
(
const
ExecArgs
&
args
)
const
=
0
;
virtual
size_t
get_preprocess_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
return
0
;
}
virtual
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
SizeArgs
&
args
)
const
{
return
{};
}
virtual
void
exec_preprocess
(
const
ExecArgs
&
args
)
const
{}
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
...
...
dnn/src/cuda/conv_bias/opr_impl.cpp
浏览文件 @
4aa277a2
...
...
@@ -29,7 +29,8 @@ void ConvBiasForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
_megdnn_workspace
workspace
)
{
check_exec
(
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
,
workspace
.
size
,
preprocessed_filter
);
AlgoBase
::
ExecArgs
args
(
this
,
src
,
filter
,
bias
,
z
,
dst
,
workspace
);
AlgoBase
::
ExecArgs
args
(
this
,
src
,
filter
,
bias
,
z
,
dst
,
workspace
,
preprocessed_filter
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
);
algo
->
check_workspace
(
args
,
workspace
).
exec
(
args
);
...
...
@@ -205,17 +206,50 @@ const char* ConvBiasForwardImpl::get_algorithm_set_name() const {
return
"CONV_BIAS_CUDA"
;
}
size_t
ConvBiasForwardImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
)
{
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
bias
,
z
,
dst
};
size_t
ConvBiasForwardImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
bias
,
z
,
dst
,
preprocessed_filter
};
return
get_algorithm
(
this
,
src
,
filter
,
bias
,
z
,
dst
)
->
get_workspace_in_bytes
(
args
);
};
size_t
ConvBiasForwardImpl
::
get_preprocess_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
{
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
bias
,
z
,
dst
};
return
get_algorithm
(
this
,
src
,
filter
,
bias
,
z
,
dst
)
->
get_preprocess_workspace_in_bytes
(
args
);
}
SmallVector
<
TensorLayout
>
ConvBiasForwardImpl
::
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
{
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
bias
,
z
,
dst
};
return
get_algorithm
(
this
,
src
,
filter
,
bias
,
z
,
dst
)
->
deduce_preprocessed_filter_layout
(
args
);
}
void
ConvBiasForwardImpl
::
exec_preprocess
(
const
TensorLayout
&
src_layout
,
_megdnn_tensor_in
filter
,
const
TensorLayout
&
bias_layout
,
const
TensorLayout
&
z_layout
,
const
TensorLayout
&
dst_layout
,
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
TensorND
src
{
nullptr
,
src_layout
},
dst
{
nullptr
,
dst_layout
},
z
{
nullptr
,
z_layout
},
bias
{
nullptr
,
bias_layout
};
AlgoBase
::
ExecArgs
args
(
this
,
src
,
filter
,
bias
,
z
,
dst
,
workspace
,
preprocessed_filter
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
);
return
algo
->
exec_preprocess
(
args
);
}
}
// namespace cuda
}
// namespace megdnn
...
...
dnn/src/cuda/conv_bias/opr_impl.h
浏览文件 @
4aa277a2
...
...
@@ -44,21 +44,14 @@ public:
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
};
const
TensorLayout
&
)
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
{};
}
const
TensorLayout
&
,
const
TensorLayout
&
)
override
;
void
exec_preprocess
(
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
PreprocessedFilter
*
,
_megdnn_workspace
)
override
{
megdnn_throw
(
"cuda conv_bias exec_preprocess has not implemeted yet"
);
}
_megdnn_workspace
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
;
class
AlgoBase
;
...
...
dnn/test/common/comparator.inl
浏览文件 @
4aa277a2
...
...
@@ -32,6 +32,22 @@ class DefaultComparator<dt_float32> {
}
};
template
<
>
class
DefaultComparator
<
dt_qint8
>
{
public:
bool
is_same
(
dt_qint8
expected
,
dt_qint8
actual
)
const
{
return
expected
.
as_int8
()
==
actual
.
as_int8
();
}
};
template
<
>
class
DefaultComparator
<
dt_qint32
>
{
public:
bool
is_same
(
dt_qint32
expected
,
dt_qint32
actual
)
const
{
return
expected
.
as_int32
()
==
actual
.
as_int32
();
}
};
}
// namespace test
}
// namespace megdnn
...
...
dnn/test/common/conv_bias.cpp
浏览文件 @
4aa277a2
...
...
@@ -741,10 +741,12 @@ void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype,
std
::
unique_ptr
<
RNG
>
rng
;
std
::
unique_ptr
<
RNG
>
bias_rng
;
std
::
unique_ptr
<
RNG
>
const_rng
;
std
::
unique_ptr
<
RNG
>
zero_rng
;
// TODO: check range of rng
if
(
src_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
{
rng
=
std
::
make_unique
<
UniformIntRNG
>
(
-
3
,
3
);
const_rng
=
std
::
make_unique
<
UniformIntRNG
>
(
1
,
1
);
zero_rng
=
std
::
make_unique
<
UniformIntRNG
>
(
0
,
0
);
megdnn_assert
(
bias_dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS32
);
bias_rng
=
std
::
make_unique
<
UniformIntRNG
>
(
-
50
,
50
);
checker
.
set_epsilon
(
1
+
1e-3
)
...
...
@@ -775,6 +777,12 @@ void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype,
fh
=
arg
.
filter
[
2
];
fw
=
arg
.
filter
[
3
];
z
[
1
]
=
arg
.
filter
[
0
]
/
4
;
}
else
if
(
format
==
Format
::
NCHW32
)
{
hi
=
arg
.
src
[
2
];
wi
=
arg
.
src
[
3
];
fh
=
arg
.
filter
[
2
];
fw
=
arg
.
filter
[
3
];
z
[
1
]
=
arg
.
filter
[
0
]
/
32
;
}
else
{
megdnn_assert
(
format
==
Format
::
CHWN4
);
hi
=
arg
.
src
[
1
];
...
...
@@ -798,7 +806,7 @@ void check_conv_bias(DType src_dtype, DType filter_dtype, DType bias_dtype,
megdnn_assert
(
rng
!=
nullptr
&&
bias_rng
!=
nullptr
);
checker
.
set_rng
(
0
,
rng
.
get
())
.
set_rng
(
1
,
rng
.
get
())
.
set_rng
(
2
,
rng
.
get
())
.
set_rng
(
2
,
bias_
rng
.
get
())
.
set_rng
(
3
,
rng
.
get
());
if
(
args
.
empty
())
{
std
::
vector
<
TestArg
>
default_args
;
...
...
dnn/test/common/tensor.inl
浏览文件 @
4aa277a2
...
...
@@ -24,7 +24,8 @@ Tensor<T, C>::Tensor(Handle *handle, TensorLayout layout):
m_handle
(
handle
),
m_comparator
(
C
())
{
layout
.
dtype
=
get_dtype_from_static_type
<
T
>
();
if
(
!
layout
.
dtype
.
valid
())
layout
.
dtype
=
get_dtype_from_static_type
<
T
>
();
m_tensornd
.
raw_ptr
=
megdnn_malloc
(
m_handle
,
layout
.
span
().
dist_byte
());
m_tensornd
.
layout
=
layout
;
}
...
...
@@ -67,10 +68,10 @@ void Tensor<T, C>::check_with(const Tensor<T, C_> &rhs) const
auto
index
=
Index
(
m_tensornd
.
layout
,
linear_idx
);
auto
offset
=
index
.
positive_offset
();
ASSERT_TRUE
(
m_comparator
.
is_same
(
p0
[
offset
],
p1
[
offset
]))
<<
"Index is "
<<
index
.
to_string
()
<<
"; layout is "
<<
m_tensornd
.
layout
.
to_string
()
<<
"; this->ptr()[offset] is "
<<
this
->
ptr
()[
offset
]
<<
"; rhs.ptr()[offset] is "
<<
rhs
.
ptr
()[
offset
];
<<
"Index is "
<<
index
.
to_string
()
<<
"; layout is "
<<
m_tensornd
.
layout
.
to_string
()
<<
"; this->ptr()[offset] is "
<<
this
->
ptr
()[
offset
]
<<
"; rhs.ptr()[offset] is "
<<
rhs
.
ptr
()[
offset
];
}
}
...
...
dnn/test/cuda/conv_bias_int8.cpp
浏览文件 @
4aa277a2
...
...
@@ -18,6 +18,8 @@
#include "test/cuda/benchmark.h"
#include "test/cuda/fixture.h"
#include "test/cuda/utils.h"
#include "test/common/tensor.h"
#include "test/common/workspace_wrapper.h"
#define V1(x) #x
#define V(x) V1(x)
...
...
@@ -34,7 +36,6 @@ struct BenchArgs {
std
::
vector
<
BenchArgs
>
get_resnet50_bench_args
(
size_t
batch
=
64
)
{
std
::
vector
<
BenchArgs
>
args
;
args
.
emplace_back
(
BenchArgs
{
batch
,
64
,
56
,
56
,
256
,
1
,
1
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
56
,
56
,
32
,
3
,
1
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
56
,
56
,
32
,
3
,
2
});
args
.
emplace_back
(
BenchArgs
{
batch
,
4
,
256
,
256
,
32
,
7
,
2
});
...
...
@@ -44,7 +45,6 @@ std::vector<BenchArgs> get_resnet50_bench_args(size_t batch = 64) {
args
.
emplace_back
(
BenchArgs
{
batch
,
64
,
56
,
56
,
64
,
3
,
1
});
args
.
emplace_back
(
BenchArgs
{
batch
,
64
,
56
,
56
,
64
,
3
,
2
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
56
,
56
,
64
,
3
,
2
});
args
.
emplace_back
(
BenchArgs
{
batch
,
64
,
56
,
56
,
256
,
1
,
1
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
56
,
56
,
512
,
1
,
2
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
56
,
56
,
128
,
1
,
2
});
...
...
@@ -57,6 +57,7 @@ std::vector<BenchArgs> get_resnet50_bench_args(size_t batch = 64) {
args
.
emplace_back
(
BenchArgs
{
batch
,
1024
,
14
,
14
,
256
,
1
,
1
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
14
,
14
,
256
,
3
,
1
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
14
,
14
,
1024
,
1
,
1
});
args
.
emplace_back
(
BenchArgs
{
batch
,
256
,
14
,
14
,
1024
,
1
,
2
});
args
.
emplace_back
(
BenchArgs
{
batch
,
1024
,
14
,
14
,
2048
,
1
,
2
});
args
.
emplace_back
(
BenchArgs
{
batch
,
1024
,
14
,
14
,
512
,
1
,
2
});
...
...
@@ -331,6 +332,12 @@ void benchmark_target_algo_with_cudnn_tsc(
if
((
format
==
Format
::
CHWN4
||
format
==
Format
::
NCHW4
)
&&
(
arg
.
ci
%
16
!=
0
))
continue
;
Format
format_cudnn
=
arg
.
ci
%
32
==
0
&&
arg
.
co
%
32
==
0
?
Format
::
NCHW32
:
Format
::
NCHW4
;
param
.
format
=
format_cudnn
;
benchmarker_cudnn
.
set_param
(
param
);
float
time_in_ms
=
0.
f
;
if
(
algo
)
{
time_in_ms
=
...
...
@@ -351,18 +358,14 @@ void benchmark_target_algo_with_cudnn_tsc(
{}})
/
RUNS
;
}
Format
format_cudnn
=
arg
.
ci
%
32
==
0
&&
arg
.
co
%
32
==
0
?
Format
::
NCHW32
:
Format
::
NCHW4
;
param
.
format
=
format_cudnn
;
benchmarker_cudnn
.
set_param
(
param
);
auto
time_in_ms_cudnn
=
float
time_in_ms_cudnn
=
benchmarker_cudnn
.
execs
({
get_tensor_shape
(
src
,
format_cudnn
),
get_tensor_shape
(
filter
,
format_cudnn
),
get_tensor_shape
(
bias
,
format_cudnn
),
{},
{}})
/
RUNS
;
float
flo
=
2.0
*
arg
.
n
*
arg
.
co
*
ho
*
wo
*
arg
.
ci
*
arg
.
f
*
arg
.
f
/
(
1e12
);
printf
(
"src=%s, filter=%s, dst=%s, time(algo=%s)=%.2f %.2fTops, "
...
...
@@ -1075,8 +1078,8 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) {
#if CUDA_VERSION >= 10020
/// \note: we only check several cases and block sizes in megdnn_test, the
full
/// testcases are written in cutlass repository
/// \note: we only check several cases and block sizes in megdnn_test, the
///
full
testcases are written in cutlass repository
TEST_F
(
CUDA
,
CUTLASS_CONV_BIAS_INT8_NCHW32_IMMA
)
{
require_compute_capability_eq
(
7
,
5
);
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录