From 75eebb7c42a3dbe7d90bed6955eb62eaa7146125 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 20 Jul 2020 22:37:14 +0800 Subject: [PATCH] feat(opr): use weight preprocess feature of MegDNN GitOrigin-RevId: 779041f8a87051e58d5e0ca289773b05a261a8a0 --- dnn/include/megdnn/oprs/nn.h | 19 +- src/opr/impl/dnn/convolution.cpp | 309 ++++++++++++++---- .../include/megbrain/opr/dnn/convolution.h | 60 +++- src/opr/test/dnn/convolution.cpp | 225 ++++++++++++- 4 files changed, 535 insertions(+), 78 deletions(-) diff --git a/dnn/include/megdnn/oprs/nn.h b/dnn/include/megdnn/oprs/nn.h index d6a1fefdf..411429484 100644 --- a/dnn/include/megdnn/oprs/nn.h +++ b/dnn/include/megdnn/oprs/nn.h @@ -51,6 +51,17 @@ protected: }; using SeparableConv = SeparableConvForward; +namespace detail { + +struct PreprocessedFilter { + //! user data; its lifetime should be bound to MegDNN Convolution + //! operator + void* algorithm_id; + TensorNDArray tensors; +}; + +} // namespace intl + /** * \brief base class for convolution operation * @@ -131,13 +142,7 @@ public: return flag; } }; - - struct PreprocessedFilter { - //! user data; its lifetime should be bound to MegDNN Convolution - //! operator - void* algorithm_id; - TensorNDArray tensors; - }; + using PreprocessedFilter = detail::PreprocessedFilter; protected: // Check or deduce output DType diff --git a/src/opr/impl/dnn/convolution.cpp b/src/opr/impl/dnn/convolution.cpp index 2df4e95d2..ea79cdb08 100644 --- a/src/opr/impl/dnn/convolution.cpp +++ b/src/opr/impl/dnn/convolution.cpp @@ -10,6 +10,7 @@ */ #include "megbrain/opr/dnn/convolution.h" +#include "megbrain/opr/io.h" #include "megbrain/graph/grad_impl.h" #include "megbrain/system.h" @@ -95,67 +96,14 @@ MGB_FOREACH_FASTRUN_OPR(cb) #undef cb -template -struct OprAttributeTrait { - static bool is_weights_persistent(const MGBOpr*) { return false; } -}; - -template <> -struct OprAttributeTrait { - //! return true if the flag of weights is PERSISTENT_DEVICE_VALUE, false - //! otherwise. True means weights can be tranformed in the first run. - static bool is_weights_persistent(const opr::ConvBias* opr) { - return opr->input()[1]->contain_flag( - VarNode::Flag::PERSISTENT_DEVICE_VALUE); - } -}; - -template -constexpr bool opr_supports_preprocess() { - return std::is_same::value || - std::is_same::value; -} - template struct OprArityTrait; -#define APPLY(statement, ...) \ - mgb::apply([&](const auto&... args) { return statement; }, \ - std::tuple_cat(__VA_ARGS__)) - template struct OprArityTraitTmpl { static constexpr int arity_in = _arity_in; static constexpr int arity_out = _arity_out; static constexpr int arity = arity_in + arity_out; - using Algorithm = typename Opr::Algorithm; - using TensorLayoutArray = std::array; - - static size_t get_workspace_in_bytes(Opr* opr, Algorithm* algo, - const TensorLayoutArray& layouts) { - opr->execution_policy() = {algo}; - size_t workspace_size; - if_constexpr()>([&](auto) { - workspace_size = APPLY( - opr->get_workspace_in_bytes(args..., nullptr), layouts); - }, /* else */ [&](auto) { - workspace_size = - APPLY(opr->get_workspace_in_bytes(args...), layouts); - }); - return workspace_size; - } - - static void exec(Opr* opr, - const std::array& inp_val, - const std::array& out_val, - megdnn::Workspace& workspace) { - if_constexpr()>([&](auto) { - APPLY(opr->exec(args.as_megdnn()..., nullptr, workspace), inp_val, - out_val); - }, /* else */ [&](auto) { - APPLY(opr->exec(args.as_megdnn()..., workspace), inp_val, out_val); - }); - } }; #define INST_ARITY(_Opr, _in, _out) \ @@ -179,6 +127,26 @@ INST_ARITY(megdnn::DeformableConvBackwardData, 5, 3); #undef INST_ARITY +template +constexpr bool opr_supports_preprocess() { + return std::is_same::value || + std::is_same::value; +} + +template +struct PreprocessFilterImpl { + using T = union {}; +}; + +template +struct PreprocessFilterImpl { + using T = typename Opr::PreprocessedFilter; +}; + +template +using PreprocessFilter = + typename PreprocessFilterImpl()>::T; + // timeout delta to be added with fastest known algorithm for new algos constexpr double TIMEOUT_TOLERANCE = 2; @@ -225,6 +193,7 @@ public: CompNode::Locator comp_node_loc; ConvTensorShapes shapes; typename Opr::Param opr_param; + bool allow_weight_preprocess; //! filled by profile() mutable double actual_timeout; @@ -277,6 +246,10 @@ double TimedProfiler::init_timeout_setting() { return 0; } +#define APPLY(statement, ...) \ + mgb::apply([&](const auto&... args) { return statement; }, \ + std::tuple_cat(__VA_ARGS__)) + template typename TimedProfiler::TResult TimedProfiler::prof_impl( const TParam& raw_param) { @@ -324,6 +297,16 @@ typename TimedProfiler::TResult TimedProfiler::prof_impl( megdnn_opr->execution_policy() = {algo}; } + // Allocate preprocessed weight buffers. + TensorLayoutArray preprocessed_layout; + if_constexpr()>([&](auto _) { + if (param.allow_weight_preprocess) { + preprocessed_layout = APPLY( + _(megdnn_opr)->deduce_preprocessed_filter_layout(args...), + layouts); + } + }); + { // first allocate a whole chunk to avoid memory fragmentation (here we // rely on memory allocator to reuse memory) @@ -332,6 +315,9 @@ typename TimedProfiler::TResult TimedProfiler::prof_impl( for (int i = 0; i < arity; ++i) { tot_size += layouts[i].span().high_byte + align; } + for (const auto& layout : preprocessed_layout) { + tot_size += layout.span().high_byte + align; + } tot_size += param.workspace; DeviceTensorStorage storage{cn}; storage.ensure_size(tot_size); @@ -362,15 +348,46 @@ typename TimedProfiler::TResult TimedProfiler::prof_impl( mdn_workspace.raw_ptr = workspace.raw_ptr(); } + // allocate storage for preprocessed filter + SmallVector flt_val(preprocessed_layout.size()); + for (size_t i = 0; i < preprocessed_layout.size(); i++) { + flt_val[i] = {cn, preprocessed_layout[i], preprocessed_layout[i].dtype, + preprocessed_layout[i].format}; + } + for (int i = 0; i < arity_in; ++i) { fill_zero_dev_tensor(inp_val[i]); } + PreprocessFilter prep_flt; + if_constexpr()>([&](auto _) { + if (!preprocessed_layout.empty()) { + auto&& pf = _(prep_flt); + pf.algorithm_id = nullptr; + pf.tensors.resize(flt_val.size()); + for (size_t i = 0; i < flt_val.size(); i++) { + pf.tensors[i] = flt_val[i].as_megdnn(); + } + APPLY(_(megdnn_opr)->exec_preprocess(args..., &pf, mdn_workspace), + std::forward_as_tuple(layouts[0], inp_val[1].as_megdnn()), + array_skip<2>(layouts)); + } + }); + RealTimer timer; auto ev_start = cn.create_event(CompNode::Event::NEED_TIMER), ev_end = cn.create_event(CompNode::Event::NEED_TIMER); ev_start->record(); - OprArityTrait::exec(megdnn_opr.get(), inp_val, out_val, mdn_workspace); + if_constexpr()>([&](auto _) { + auto&& opr = _(megdnn_opr); + PreprocessFilter* pf = + preprocessed_layout.empty() ? nullptr : &prep_flt; + APPLY(opr->exec(args.as_megdnn()..., pf, mdn_workspace), inp_val, + out_val); + }, /* else */ [&](auto _) { + APPLY(_(megdnn_opr)->exec(args.as_megdnn()..., mdn_workspace), inp_val, + out_val); + }); ev_end->record(); double next_report_time = 0.5; @@ -425,13 +442,15 @@ class AlgoChooser { const ConvTensorLayouts& m_layouts; Opr* m_megdnn_opr; const MGBOpr* m_mgb_opr; + bool m_allow_weight_preprocess; public: ExeContext(const ConvTensorLayouts& layouts, Opr* megdnn_opr, - const MGBOpr* mgb_opr) + const MGBOpr* mgb_opr, bool allow_weight_preprocess) : m_layouts{layouts}, m_megdnn_opr{megdnn_opr}, - m_mgb_opr{mgb_opr} { + m_mgb_opr{mgb_opr}, + m_allow_weight_preprocess{allow_weight_preprocess} { mgb_assert(m_layouts.size() == layouts.size()); static_assert( std::tuple_size::value == 3 || @@ -499,8 +518,23 @@ class AlgoChooser { //! get workspace size required for specific algo size_t get_workspace_size_bytes(ImplAlgo algo) const { - return OprArityTrait::get_workspace_in_bytes(m_megdnn_opr, - algo, m_layouts); + m_megdnn_opr->execution_policy() = {algo}; + size_t result; + if_constexpr()>([&](auto _) { + auto&& opr = _(m_megdnn_opr); + auto prep = construct_fake_preprocess_filter(); + PreprocessFilter* prep_ptr = + prep.valid() ? &prep.val() : nullptr; + result = std::max( + APPLY(opr->get_preprocess_workspace_in_bytes(args...), + m_layouts), + APPLY(opr->get_workspace_in_bytes(args..., prep_ptr), + m_layouts)); + }, /* else */ [&](auto _) { + result = APPLY(_(m_megdnn_opr)->get_workspace_in_bytes(args...), + m_layouts); + }); + return result; } /*! @@ -525,6 +559,28 @@ class AlgoChooser { */ void modify_param_with_weights_preprocessed( typename TimedProfiler::Param& param) const {} + + Maybe> construct_fake_preprocess_filter() const { + Maybe> result = None; + if_constexpr()>([&](auto _) { + if (!m_allow_weight_preprocess) + return; + auto opr = _(m_megdnn_opr); + auto layout = + APPLY(opr->deduce_preprocessed_filter_layout(args...), + m_layouts); + if (layout.empty()) + return; + result = PreprocessFilter{}; + auto& res = result.val(); + res.algorithm_id = nullptr; + res.tensors.resize(layout.size()); + for (size_t i = 0; i < layout.size(); i++) { + res.tensors[i] = megdnn::TensorND(nullptr, layout[i]); + } + }); + return result; + } }; //! entrance for getting algorithm according to execution strategy @@ -571,12 +627,13 @@ public: * \brief setup algorithm and return workspace size */ static size_t setup_algo(const ConvTensorLayouts& layouts, Opr* megdnn_opr, - const MGBOpr* mgb_opr) { + const MGBOpr* mgb_opr, + bool allow_weight_preprocess = false) { if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { return 0; } - ExeContext ctx(layouts, megdnn_opr, mgb_opr); + ExeContext ctx(layouts, megdnn_opr, mgb_opr, allow_weight_preprocess); auto algo = get_algo(ctx); size_t workspace = ctx.get_workspace_size_bytes(algo); @@ -780,9 +837,6 @@ Maybe AlgoChooser::ExeContext::profile_single_algo(ImplAlgo algo, double& timeout) const { typename TimedProfiler::Param param; - bool is_weights_persistent = - OprAttributeTrait::MGBOpr>:: - is_weights_persistent(m_mgb_opr); auto name = algo->name(); // force check copy size <= dest len-1 from gcc8 for safe auto len = sizeof(param.algo_name); @@ -806,8 +860,9 @@ AlgoChooser::ExeContext::profile_single_algo(ImplAlgo algo, for (size_t i = 0; i < param.shapes.size(); ++i) param.shapes[i] = m_layouts[i]; param.opr_param = m_megdnn_opr->param(); + param.allow_weight_preprocess = m_allow_weight_preprocess; - if (is_weights_persistent) { + if (m_allow_weight_preprocess) { modify_param_with_weights_preprocessed(param); } @@ -911,6 +966,78 @@ AlgoChooserProfileCache& mixin::Convolution::profile_cache() const { return *m_profile_cache; } +class mixin::WeightPreprocessExecutor::PreprocessedFilterExecDep final + : public cg::GraphExecutable::ExecDependency { + std::unique_ptr m_pf; + SmallVector m_filter_storage; + +public: + explicit PreprocessedFilterExecDep( + std::unique_ptr preprocessed_filter, + SmallVector filter_storage) + : m_pf(std::move(preprocessed_filter)), + m_filter_storage(std::move(filter_storage)) {} +}; + +void mixin::WeightPreprocessExecutor::mixin_update_preprocessed_filter( + cg::OperatorNodeBase& opr) { + if (!mixin_allow_weight_preprocess(opr)) return; + + auto new_layout = deduce_preprocessed_filter_layout(); + if (new_layout.empty()) { + // Weight preprocess was needed before, but no longer needed. + if (m_preprocessed_filter) { + m_preprocessed_filter.reset(); + m_filter_storage.clear(); + } + return; + } + + bool should_update = false; + size_t new_size = new_layout.size(); + if (!m_preprocessed_filter || + m_preprocessed_filter->tensors.size() != new_size) { + should_update = true; + } else { + for (size_t i = 0; i < new_size; i++) { + if (!new_layout[i].eq_layout( + m_preprocessed_filter->tensors[i].layout)) { + should_update = true; + break; + } + } + } + if (!should_update) return; + + if (!m_preprocessed_filter) { + m_preprocessed_filter.reset(new PreprocessedFilter{}); + } + m_preprocessed_filter->tensors.resize(new_size); + m_filter_storage.resize(new_size); + m_preprocessed_filter->algorithm_id = nullptr; + for (size_t i = 0; i < new_size; i++) { + m_filter_storage[i] = {opr.output(0)->comp_node(), new_layout[i], + new_layout[i].dtype, new_layout[i].format}; + m_preprocessed_filter->tensors[i] = m_filter_storage[i].as_megdnn(); + } + scn_do_execute_preprocess(); +} + +void mixin::WeightPreprocessExecutor::record_preprocessed_weight( + cg::GraphExecutable::ExecDependencyArray& deps) { + deps.emplace_back(new PreprocessedFilterExecDep{ + std::move(m_preprocessed_filter), std::move(m_filter_storage)}); +} + +bool mixin::WeightPreprocessExecutor::mixin_allow_weight_preprocess( + const cg::OperatorNodeBase& opr) const { + bool param_merged = opr.input(1) + ->owner_opr() + ->same_type(); + return opr.input(1)->contain_flag(VarNode::Flag::PERSISTENT_DEVICE_VALUE) && + (cg::is_const_var_value(opr.input(1)) || param_merged); +} + /* ==================== ConvolutionForward ==================== */ IMPL_CONV(ConvolutionForward, "conv_fwd"); @@ -971,7 +1098,7 @@ size_t ConvolutionForward::get_workspace_size_bytes( input(0)->format()}, {input_shapes[1], input(1)->dtype(), input(1)->format()}, {output_shapes[0], output(0)->dtype(), output(0)->format()}}, - megdnn_opr(), this); + megdnn_opr(), this, allow_weight_preprocess()); } void ConvolutionForward::init_output_format() { @@ -980,9 +1107,14 @@ void ConvolutionForward::init_output_format() { } void ConvolutionForward::scn_do_execute() { + if (input(1)->contain_flag(VarNode::Flag::PERSISTENT_DEVICE_VALUE) && + cg::is_const_var_value(input(1))) { + update_preprocessed_filter(); + } megdnn_opr()->exec(input(0)->dev_tensor().as_megdnn(), input(1)->dev_tensor().as_megdnn(), - output(0)->dev_tensor().as_megdnn(), nullptr, + output(0)->dev_tensor().as_megdnn(), + preprocessed_filter(), intl::get_megdnn_workspace_from_var(output().back())); } @@ -1012,6 +1144,20 @@ void ConvolutionForward::get_output_var_shape( void ConvolutionForward::record_execute_deps( cg::GraphExecutable::ExecDependencyArray& deps) { record_megdnn_opr(deps); + record_preprocessed_weight(deps); +} + +SmallVector +ConvolutionForward::deduce_preprocessed_filter_layout() { + return megdnn_opr()->deduce_preprocessed_filter_layout( + input(0)->layout(), input(1)->layout(), output(0)->layout()); +} + +void ConvolutionForward::scn_do_execute_preprocess() { + megdnn_opr()->exec_preprocess( + input(0)->layout(), input(1)->dev_tensor().as_megdnn(), + output(0)->layout(), preprocessed_filter(), + intl::get_megdnn_workspace_from_var(output().back())); } /* ==================== ConvolutionBackwardData ==================== */ @@ -1504,10 +1650,12 @@ size_t ConvBiasForward::get_workspace_size_bytes( i2, i3, {output_shapes[0], output(0)->dtype(), output(0)->format()}}, - mo, this); + mo, this, allow_weight_preprocess()); } void ConvBiasForward::scn_do_execute() { + update_preprocessed_filter(); + auto&& inp = input(); auto mo = megdnn_opr(); if (inp.size() == 2) { @@ -1621,6 +1769,33 @@ megdnn::param::MatrixMul::Format ConvBiasForward::get_matmul_format( } } +SmallVector ConvBiasForward::deduce_preprocessed_filter_layout() { + TensorLayout i2, i3; + if (input().size() > 2) { + i2 = input(2)->layout(); + } + if (input().size() > 3) { + i3 = input(3)->layout(); + } + return megdnn_opr()->deduce_preprocessed_filter_layout( + input(0)->layout(), input(1)->layout(), i2, i3, + output(0)->layout()); +} + +void ConvBiasForward::scn_do_execute_preprocess() { + TensorLayout bias_layout(output(0)->dtype()), z_layout(output(0)->dtype()); + if (input().size() > 2) { + bias_layout = input(2)->layout(); + } + if (input().size() > 3) { + z_layout = input(3)->layout(); + } + megdnn_opr()->exec_preprocess( + input(0)->layout(), input(1)->dev_tensor().as_megdnn(), bias_layout, + z_layout, output(0)->layout(), preprocessed_filter(), + intl::get_megdnn_workspace_from_var(output().back())); +} + /* ===================== LocalShareForward ==================== */ IMPL_CONV(LocalShareForward, "local_share"); diff --git a/src/opr/include/megbrain/opr/dnn/convolution.h b/src/opr/include/megbrain/opr/dnn/convolution.h index 311b36b40..cd0a410a6 100644 --- a/src/opr/include/megbrain/opr/dnn/convolution.h +++ b/src/opr/include/megbrain/opr/dnn/convolution.h @@ -72,13 +72,52 @@ class Convolution { cg::OperatorNodeBase* self); }; +class WeightPreprocessExecutor : public cg::OperatorNodeMixinBase { + class PreprocessedFilterExecDep; + + using PreprocessedFilter = megdnn::detail::PreprocessedFilter; + std::unique_ptr m_preprocessed_filter; + SmallVector m_filter_storage; +protected: + //! this should only be called in scn_do_execute or similar functions (i.e. + //! post dispatch-to-ExecEnv) + void mixin_update_preprocessed_filter(OperatorNodeBase& opr); + void record_preprocessed_weight( + cg::GraphExecutable::ExecDependencyArray& deps); + PreprocessedFilter* preprocessed_filter() const { + return m_preprocessed_filter.get(); + } + + bool mixin_allow_weight_preprocess(const OperatorNodeBase& opr) const; + virtual SmallVector deduce_preprocessed_filter_layout() = 0; + virtual void scn_do_execute_preprocess() = 0; +}; + } // namespace mixin namespace intl { + //! glue class to apply mixin::WeightPreprocessExecutor + template + class OprWithWeightPreprocess: public mixin::CheckBase::Base, + public MixinImpl { + protected: + using Base::Base; + + void update_preprocessed_filter() { + this->mixin_update_preprocessed_filter(*this); + } + + bool allow_weight_preprocess() const { + return this->mixin_allow_weight_preprocess(*this); + } + }; + using ConvBiasBase = cg::SingleCNOperatorNode< cg::OutshapePureByInshapeOpr<>, mixin::MegDNNOprHolderImpl>; - using ConvBiasForwardBase = WorkspaceSizeInfer; + using ConvBiasForwardBase = + OprWithWeightPreprocess>; using DeformableConvBackwardDataT = cg::SingleCNOperatorNode< cg::OutshapePureByInshapeOpr<>, @@ -90,12 +129,20 @@ namespace intl { mixin::MegDNNOprHolderImpl>; using BatchConvBiasForwardBase = WorkspaceSizeInfer; - using ConvolutionForwardBase = WorkspaceSizeInfer< - typename MegDNNOprWrapperFwdBase::Base>; + using ConvolutionForwardBase = OprWithWeightPreprocess< + WorkspaceSizeInfer::Base>>; } // namespace intl +namespace testing { + +class ConvolutionTestingPeer; + +} // namespace testing + MGB_DEFINE_OPR_CLASS(ConvolutionForward, intl::ConvolutionForwardBase, public mixin::Convolution) // { + void init_profile_cache() override; void init_output_dtype() override; size_t get_workspace_size_bytes( @@ -109,6 +156,10 @@ MGB_DEFINE_OPR_CLASS(ConvolutionForward, TensorShapeArray& out_shape) const override final; void record_execute_deps( cg::GraphExecutable::ExecDependencyArray& deps) override; + SmallVector deduce_preprocessed_filter_layout() override; + void scn_do_execute_preprocess() override; + + friend testing::ConvolutionTestingPeer; public: ConvolutionForward(VarNode *src, VarNode *filter, @@ -142,7 +193,10 @@ MGB_DEFINE_OPR_CLASS(ConvBiasForward, intl::ConvBiasForwardBase, void record_execute_deps( cg::GraphExecutable::ExecDependencyArray& deps) override { this->record_megdnn_opr(deps); + this->record_preprocessed_weight(deps); } + SmallVector deduce_preprocessed_filter_layout() override; + void scn_do_execute_preprocess() override; public: //! src * filter diff --git a/src/opr/test/dnn/convolution.cpp b/src/opr/test/dnn/convolution.cpp index 78361a312..b6016bc53 100644 --- a/src/opr/test/dnn/convolution.cpp +++ b/src/opr/test/dnn/convolution.cpp @@ -21,6 +21,8 @@ #include "megbrain/gopt/inference.h" #include "megbrain/opr/tensor_manip.h" +#include + #include #include @@ -244,7 +246,6 @@ opr::Convolution::Param convert_to_conv_param( param.dilate_w, param.sparse, param.format}; }; #endif -} // anonymous namespace TEST(TestOprDNN, ConvolutionForward) { uint32_t ih = 10, ic = 16, oc = 32, ph = 0, sh = 1, fh = 2; @@ -1172,6 +1173,7 @@ TEST(TestOprDNN, ConvBiasForward) { {1, OC, 1, 1}}, opt3); }; + run(1, 1, 1, 5, 5, 1, 1); run(1, 1, 1, 5, 5, 3, 3); run(2, 3, 4, 5, 5, 3, 3); run(3, 3, 4, 224, 223, 3, 3); @@ -2124,4 +2126,225 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) { #endif +} // anonymous namespace + +namespace mgb { +namespace opr { +namespace testing { + +class ConvolutionTestingPeer { + opr::ConvolutionForward& m_conv_opr; +public: + explicit ConvolutionTestingPeer(cg::OperatorNodeBase* opr) + : m_conv_opr(opr->cast_final_safe()) {} + void set_megdnn_opr( + std::unique_ptr megdnn_opr) { + m_conv_opr.set_megdnn_opr(std::move(megdnn_opr)); + } +}; + +} // namespace testing +} // namespace opr +} // namespace mgb + +namespace { + +using megdnn::TensorND; +using megdnn::Workspace; +using opr::testing::ConvolutionTestingPeer; + +class MockConvolutionForward : public megdnn::ConvolutionForward { + const char* m_algorithm_set_name; +public: + MockConvolutionForward(megdnn::ConvolutionForward* orig, + const char* algo_set_name) + : megdnn::ConvolutionForward(orig->handle()), + m_algorithm_set_name(algo_set_name) {} + + MOCK_METHOD5(exec, void(_megdnn_tensor_in src, _megdnn_tensor_in filter, + _megdnn_tensor_out dst, + const PreprocessedFilter* preprocessed_filter, + _megdnn_workspace workspace)); + MOCK_METHOD5(exec_preprocess, + void(const TensorLayout& src_layout, _megdnn_tensor_in filter, + const TensorLayout& dst_layout, + PreprocessedFilter* preprocessed_filter, + _megdnn_workspace workspace)); + MOCK_METHOD4(get_workspace_in_bytes, + size_t(const TensorLayout& src, const TensorLayout& filter, + const TensorLayout& dst, + const PreprocessedFilter* preprocessed_filter)); + MOCK_METHOD3(deduce_preprocessed_filter_layout, + SmallVector(const TensorLayout& src, + const TensorLayout& filter, + const TensorLayout& dst)); + MOCK_METHOD3(get_preprocess_workspace_in_bytes, + size_t(const TensorLayout& src, const TensorLayout& filter, + const TensorLayout& dst)); + MOCK_METHOD3(get_all_algorithms, + std::vector(const TensorLayout& p0, + const TensorLayout& p1, + const TensorLayout& p2)); + MOCK_METHOD5(get_algorithm_heuristic, + Algorithm*(const TensorLayout& p0, const TensorLayout& p1, + const TensorLayout& p2, + size_t workspace_limit_in_bytes, + bool reproducible)); + const char* get_algorithm_set_name() const override { + return m_algorithm_set_name; + } +}; + +class MockAlgorithm : public megdnn::detail::Algorithm { + const char* m_name; + +public: + MockAlgorithm(const char* name = "NotImportant") : m_name(name) {} + bool is_reproducible() const override { return true; } + const char* name() const override { return m_name; } + + virtual ~MockAlgorithm() = default; +}; + +class TestWeightPreprocess : public ::testing::Test { +protected: + CompNode comp_node; + std::shared_ptr graph; + std::shared_ptr x_host; + MockConvolutionForward* mock_conv_ptr; + SymbolVar y; + HostTensorND y_host; + std::unique_ptr func; + + MockConvolutionForward& mock_conv() { return *mock_conv_ptr; } + + void SetUp() override { + constexpr uint32_t ih = 10, ic = 16, oc = 32, ph = 0, sh = 1, fh = 2, + iw = ih; + comp_node = CompNode::load("cpux"); + graph = ComputingGraph::make(); + TensorShape x_shape{1, ic, ih, iw}, w_shape{oc, ic, fh, fh}; + x_host = std::make_shared(comp_node, x_shape); + auto x = opr::Host2DeviceCopy::make(*graph, x_host); + auto w = opr::ImmutableTensor::make(*graph, {comp_node, w_shape}); + Param param; + param.pad_h = param.pad_w = ph; + param.stride_h = param.stride_w = sh; + param.format = Param::Format::NCHW; + y = opr::ConvolutionForward::make(x, w, param); + auto& opr = + y.node()->owner_opr()->cast_final(); + auto mock = std::make_unique( + opr.megdnn_opr(), ::testing::UnitTest::GetInstance() + ->current_test_info() + ->name()); + mock_conv_ptr = mock.get(); + ConvolutionTestingPeer{&opr}.set_megdnn_opr(std::move(mock)); + func = graph->compile({make_callback_copy(y, y_host)}); + } + + void run() { func->execute().wait(); } + + void TearDown() override { + func.reset(); + // Triggers mock check + graph.reset(); + x_host.reset(); + } +}; + +TEST_F(TestWeightPreprocess, NoPreprocessNeeded) { + using ::testing::_; + using ::testing::Return; + auto& mock = mock_conv(); + + MockAlgorithm algo; + EXPECT_CALL(mock, get_algorithm_heuristic(_, _, _, _, _)) + .WillRepeatedly(Return(&algo)); + EXPECT_CALL(mock, get_workspace_in_bytes(_, _, _, _)) + .WillRepeatedly(Return(0)); + EXPECT_CALL(mock, get_preprocess_workspace_in_bytes(_, _, _)) + .WillRepeatedly(Return(0)); + + { + ::testing::InSequence seq; + // Return empty preprocess filters, indicating no need to preprocess + EXPECT_CALL(mock, deduce_preprocessed_filter_layout(_, _, _)) + .WillRepeatedly(Return(SmallVector{})); + EXPECT_CALL(mock, exec_preprocess(_, _, _, _, _)).Times(0); + EXPECT_CALL(mock, exec(_, _, _, nullptr, _)); + run(); + } +} + +TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) { + using ::testing::_; + using ::testing::Return; + using ::testing::Field; + using ::testing::Invoke; + using ::testing::Expectation; + using PF = MockConvolutionForward::PreprocessedFilter; + + auto& mock = mock_conv(); + MockAlgorithm algo; + SmallVector filter_layout{{{1, 2, 3, 4}, dtype::Float32()}, + {{5, 6, 7, 8}, dtype::Float32()}}; + + EXPECT_CALL(mock, deduce_preprocessed_filter_layout(_, _, _)) + .WillRepeatedly(Return(filter_layout)); + + Expectation algo_call = + EXPECT_CALL(mock, get_algorithm_heuristic(_, _, _, _, _)) + .WillOnce(Return(&algo)); + Expectation ws_call = EXPECT_CALL(mock, get_workspace_in_bytes(_, _, _, _)) + .After(algo_call) + .WillOnce(Return(0)); + Expectation pre_ws_call = + EXPECT_CALL(mock, get_preprocess_workspace_in_bytes(_, _, _)) + .After(algo_call) + .WillOnce(Return(233)); + { + ::testing::InSequence seq; + + // exec_preprocess should be called only once, with workspace allocated + int salt = 0; + EXPECT_CALL(mock, exec_preprocess(_, _, _, _, _)) + .After(ws_call, pre_ws_call) + .WillOnce(Invoke([&](const TensorLayout&, _megdnn_tensor_in, + const TensorLayout&, PF* pf, + _megdnn_workspace workspace) { + ASSERT_EQ(workspace.size, 233); + ASSERT_NE(pf, nullptr); + pf->algorithm_id = &salt; + ASSERT_EQ(pf->tensors.size(), 2); + ASSERT_TRUE(pf->tensors[0].layout.eq_shape({1, 2, 3, 4})); + ASSERT_TRUE(pf->tensors[1].layout.eq_shape({5, 6, 7, 8})); + ASSERT_NE(pf->tensors[0].raw_ptr, nullptr); + ASSERT_NE(pf->tensors[1].raw_ptr, nullptr); + pf->tensors[0].ptr()[0] = 114.514f; + pf->tensors[1].ptr()[0] = 1926.0817f; + })); + + // Run the graph multiple times. + for (int i = 0; i < 3; i++) { + if (i > 0) { + EXPECT_CALL(mock, exec_preprocess(_, _, _, _, _)).Times(0); + } + EXPECT_CALL(mock, exec(_, _, _, _, _)) + .WillOnce(Invoke([&](_megdnn_tensor_in, _megdnn_tensor_in, + _megdnn_tensor_out, const PF* pf, + _megdnn_workspace) { + ASSERT_NE(pf, nullptr); + ASSERT_EQ(pf->algorithm_id, &salt); + ASSERT_EQ(pf->tensors[0].ptr()[0], 114.514f); + ASSERT_EQ(pf->tensors[1].ptr()[0], 1926.0817f); + })); + run(); + } + } +} + +} // anonymous namespace + +>>>>>>> 11c3561ca... feat(opr): use weight preprocess feature of MegDNN // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} -- GitLab