se-resnext50 FLAGS_fraction_of_gpu_memory_to_use=1 时八卡报错
Created by: ccmeteorljh
paddle: develop分支,comit-id:2192e7bb 代码库:https://github.com/PaddlePaddle/benchmark/tree/master/se-resnext 设置成0.98 八卡跑正常
CUDNN_STATUS_INTERNAL_ERROR at [/paddle/paddle/fluid/platform/device_context.cc:218]
PaddlePaddle Call Stacks:
0 0x7f98a2e9cee0p void paddle: :platform::EnforceNotMet::Init<char const*>(char const*, char const*, int) + 352
1 0x7f98a2e9d259p paddle: :platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int) + 137
2 0x7f98a4c0c5c8p paddle: :platform::CudnnHolder::CudnnHolder(CUstream_st* const*, paddle: :platform::CUDAPlace const&) + 1000
3 0x7f98a4c0c890p
4 0x7f992f5ada99p
5 0x7f98a4c0baebp paddle: :platform::CUDADeviceContext::cudnn_holder() const + 91
6 0x7f98a4c0bb19p paddle: :platform::CUDADeviceContext::cudnn_handle() const + 9
7 0x7f98a326bc50p paddle: :operators::ConvOp::GetExpectedKernelType(paddle::framework::ExecutionContext const&) const + 400
8 0x7f98a4b9602bp paddle::framework::OperatorWithKernel::ChooseKernel(paddle::framework::RuntimeContext const&, paddle::framework::Scope const&, boost::variant<paddle: :platform::CUDAPlace, paddle: :platform::CPUPlace, paddle: :platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 235
9 0x7f98a4b98101p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle: :platform::CUDAPlace, paddle: :platform::CPUPlace, paddle: :platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const + 625
10 0x7f98a4b983e1p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle: :platform::CUDAPlace, paddle: :platform::CPUPlace, paddle: :platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 545
11 0x7f98a4b95a0cp paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle: :platform::CUDAPlace, paddle: :platform::CPUPlace, paddle: :platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 332
12 0x7f98a49a5d0ap paddle::framework::details::ComputationOpHandle::RunImpl() + 250
13 0x7f98a49986a0p paddle::framework::details::OpHandleBase::Run(bool) + 160
14 0x7f98a4981522p paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std: :shared_ptr<paddle::framework::BlockingQueue<unsigned long> > const&, unsigned long*) + 50
15 0x7f98a498185fp
16 0x7f98a3c3b7c3p std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> (), std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result<void>, std::__future_base::_Result_base::_Deleter>, void> >::_M_invoke(std::_Any_data const&) + 35
17 0x7f98a2f6dd27p std::__future_base::_State_base::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>&, bool&) + 39
18 0x7f992f5ada99p
19 0x7f98a497dd72p
20 0x7f98a2f6f2a4p ThreadPool::ThreadPool(unsigned long)::{lambda()#1}: :operator()() const + 404
21 0x7f992a5afc80p
22 0x7f992f5a66bap
23 0x7f992f2dc41dp clone + 109