未验证 提交 109fdf14 编写于 作者: X XiangGao 提交者: GitHub

add flag to check_kernel launch (#32692)

上级 6ab43f7f
......@@ -25,7 +25,8 @@ limitations under the License. */
#include <unordered_set>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "gflags/gflags.h"
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/grad_op_desc_maker.h"
......@@ -67,6 +68,8 @@ class Version;
} // namespace framework
} // namespace paddle
DECLARE_bool(check_kernel_launch);
namespace paddle {
namespace framework {
......@@ -135,14 +138,16 @@ class OpRegistry {
};
template <typename PlaceType>
inline void CheckKernelLaunch(const char* op_type){};
inline void CheckKernelLaunch(const char* op_type) {}
#ifdef PADDLE_WITH_CUDA
template <>
inline void CheckKernelLaunch<::paddle::platform::CUDAPlace>(
const char* op_type) {
PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type);
};
if (FLAGS_check_kernel_launch) {
PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(op_type);
}
}
#endif
template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
......
......@@ -578,6 +578,19 @@ DEFINE_string(tracer_mkldnn_ops_on, "",
DEFINE_string(tracer_mkldnn_ops_off, "",
"List of OneDNN operation types to be turned off");
/**
* Debug related FLAG
* Name: check_kernel_launch
* Since Version: 2.1.0
* Value Range: bool, default=false
* Example:
* Note: Check kernel launch status after every kernel compute.
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DEFINE_bool(check_kernel_launch, false,
"Check kernel launch status after every kernel compute");
#endif
/**
* CUDNN related FLAG
* Name: conv2d_disable_cudnn
......
......@@ -41,6 +41,7 @@ DECLARE_int32(multiple_of_cupti_buffer_size);
DECLARE_bool(reader_queue_speed_test_mode);
DECLARE_int32(call_stack_level);
DECLARE_bool(sort_sum_gradient);
DECLARE_bool(check_kernel_launch);
// device management
DECLARE_int32(paddle_num_threads);
// executor
......@@ -376,7 +377,7 @@ static void RegisterGlobalVarGetterSetter() {
FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb,
FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math,
FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce,
FLAGS_conv2d_disable_cudnn);
FLAGS_conv2d_disable_cudnn, FLAGS_check_kernel_launch);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册