diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp index 9b6d6d67770cc1550b629ac637c37eed923ea1d6..515435d4a158c744580c202beda1a4eb713bfa5d 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp @@ -69,6 +69,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( return false; } + if (args.src_layout->dtype.enumv() == DTypeEnum::Float16 && + args.dst_layout->dtype.enumv() == DTypeEnum::Float16 && + param.format == param::ConvBias::Format::NHWC) { + return false; + } + //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal //! memory access errors, so we have to disable this kernel here. if (param.format == param::ConvBias::Format::NCHW4_NCHW || diff --git a/dnn/src/cuda/conv_bias/helper.cpp b/dnn/src/cuda/conv_bias/helper.cpp index b98520e27ec98c444938b0d5a826945f098ee822..d4cd291bc609ab288e9e73c77a0f337fd78abeab 100644 --- a/dnn/src/cuda/conv_bias/helper.cpp +++ b/dnn/src/cuda/conv_bias/helper.cpp @@ -151,14 +151,14 @@ bool is_cudnn_supported(const BiasForwardSizeArgs& args) { if (args.handle->is_tegra_k1()) return false; - // TODO: We only support NCHW format now. It seems cuDNN provides support - // for NHWC as well. - if (args.filter_meta.format == param::Convolution::Format::NCHW4) { + if (args.filter_meta.format == param::Convolution::Format::NCHW4 || + args.filter_meta.format == param::Convolution::Format::NCHW32) { if (args.dst_layout->dtype.enumv() != DTypeEnum::Int8 && args.dst_layout->dtype.enumv() != DTypeEnum::QuantizedS8) { return false; } - } else if (args.filter_meta.format != param::Convolution::Format::NCHW) { + } else if (args.filter_meta.format != param::Convolution::Format::NCHW && + args.filter_meta.format != param::Convolution::Format::NHWC) { return false; } auto& fm = args.filter_meta; diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index d1fdf8816bf80e17f07be6e702b6c93cd986da59..3435e106c39338e4c83aca9a8e3f4acf887e84ce 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -216,6 +216,41 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) { } } +TEST_F(CUDA, CONV_BIAS_FORWARD_FLOAT16) { + require_compute_capability(6, 1); + + Checker checker(handle_cuda()); + ConvBias::Param param; + param.format = ConvBias::Param::Format::NHWC; + param.nonlineMode = ConvBias::Param::NonlineMode::IDENTITY; + + checker.set_epsilon(2e-2) + .set_dtype(0, dtype::Float16()) + .set_dtype(1, dtype::Float16()) + .set_dtype(2, dtype::Float16()) + .set_dtype(3, dtype::Float16()) + .set_dtype(4, dtype::Float16()); + { + auto src_shape = TensorShape{20, 224, 224, 4}; + auto filter_shape = TensorShape{24, 1, 1, 4}; + auto bias_shape = TensorShape{1, 1, 1, 24}; + checker.set_param(param).execs( + {src_shape, filter_shape, bias_shape, {}, {}}); + param.compute_mode = ConvBias::Param::ComputeMode::FLOAT32; + checker.set_param(param).execs( + {src_shape, filter_shape, bias_shape, {}, {}}); + } + + { + param.sparse = ConvBias::Param::Sparse::GROUP; + auto src_shape = TensorShape{20, 224, 224, 16}; + auto filter_shape = TensorShape{4, 4, 1, 1, 4}; + auto bias_shape = TensorShape{1, 1, 1, 16}; + checker.set_param(param).execs( + {src_shape, filter_shape, bias_shape, {}, {}}); + } +} + TEST_F(CUDA, CONV_BIAS_NCHW_QS8) { //! not support NonlineMode::SIGMOID and NonlineMode::H_SWISH require_compute_capability(6, 1);