VGG16训练时候 mini batch 设置为32训练报错
Created by: xiangyubo
- 版本、环境信息: 1)PaddlePaddle版本:1.3.2 3)GPU:v100,16G显存 4)系统环境:AI Studio,python 3.6
- 训练信息 1)单机,单卡
我训练vgg 16的时候,输入尺寸是[3, 224, 224],显卡是v100,16G显存,为什么我的 batch 32张图就会报错,要再调小到24张才正常训练。每个 batch 能训练多少张图,有什么计算的公式之类的说明吗?不然觉得16G显存,每个 batch 才32,有点太小了。。。
以下是训练代码: `
# -*- coding: UTF-8 -*-
"""
训练常用视觉基础网络,用于分类任务
需要将训练图片,类别文件 label_list.txt 放置在同一个文件夹下
程序会先读取 train.txt 文件获取类别数和图片数量
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import time
import math
import paddle
import paddle.fluid as fluid
import codecs
import logging
from paddle.fluid.initializer import MSRA
from paddle.fluid.initializer import Uniform
from paddle.fluid.param_attr import ParamAttr
from PIL import Image
from PIL import ImageEnhance
train_parameters = {
"input_size": [3, 224, 224],
"class_dim": -1, # 分类数,会在初始化自定义 reader 的时候获得
"image_count": -1, # 训练图片数量,会在初始化自定义 reader 的时候获得
"label_dict": {},
"data_dir": "data/data2815", # 训练数据存储地址
"train_file_list": "train.txt",
"label_file": "label_list.txt",
"save_freeze_dir": "./freeze-model",
"save_persistable_dir": "./persistable-params",
"continue_train": False, # 是否接着上一次保存的参数接着训练,优先级高于预训练模型
"pretrained": False, # 是否使用预训练的模型
"pretrained_dir": "data/data6462/VGG_pretrained",
"mode": "train",
"num_epochs": 120,
"train_batch_size": 24,
"mean_rgb": [127.5, 127.5, 127.5], # 常用图片的三通道均值,通常来说需要先对训练数据做统计,此处仅取中间值
"use_gpu": True,
"image_enhance_strategy": { # 图像增强相关策略
"need_distort": True, # 是否启用图像颜色增强
"need_rotate": True, # 是否需要增加随机角度
"need_crop": True, # 是否要增加裁剪
"need_flip": True, # 是否要增加水平随机翻转
"hue_prob": 0.5,
"hue_delta": 18,
"contrast_prob": 0.5,
"contrast_delta": 0.5,
"saturation_prob": 0.5,
"saturation_delta": 0.5,
"brightness_prob": 0.5,
"brightness_delta": 0.125
},
"early_stop": {
"sample_frequency": 50,
"successive_limit": 3,
"good_acc1": 0.92
},
"rsm_strategy": {
"learning_rate": 0.005,
"lr_epochs": [20, 40, 60, 80, 100],
"lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002]
},
"momentum_strategy": {
"learning_rate": 0.005,
"lr_epochs": [20, 40, 60, 80, 100],
"lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002]
},
"sgd_strategy": {
"learning_rate": 0.005,
"lr_epochs": [20, 40, 60, 80, 100],
"lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002]
},
"adam_strategy": {
"learning_rate": 0.002
}
}
class VGGNet(object):
"""
vgg的网络类
"""
def __init__(self, layers=16):
"""
vgg网络构造函数
:param layers:
"""
self.layers = layers
def name(self):
"""
返回网络名字
:return:
"""
return 'vgg-net'
def net(self, input, class_dim=1000):
"""
构建网络结构
:param input:
:param class_dim:
:return:
"""
layers = self.layers
vgg_spec = {
11: ([1, 1, 2, 2, 2]),
13: ([2, 2, 2, 2, 2]),
16: ([2, 2, 3, 3, 3]),
19: ([2, 2, 4, 4, 4])
}
assert layers in vgg_spec.keys(), \
"supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers)
nums = vgg_spec[layers]
conv1 = self.conv_block(input, 64, nums[0])
conv2 = self.conv_block(conv1, 128, nums[1])
conv3 = self.conv_block(conv2, 256, nums[2])
conv4 = self.conv_block(conv3, 512, nums[3])
conv5 = self.conv_block(conv4, 512, nums[4])
fc_dim = 4096
fc1 = fluid.layers.fc(
input=conv5,
size=fc_dim,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(scale=0.005)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
fc1 = fluid.layers.dropout(x=fc1, dropout_prob=0.5)
fc2 = fluid.layers.fc(
input=fc1,
size=fc_dim,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(scale=0.005)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
fc2 = fluid.layers.dropout(x=fc2, dropout_prob=0.5)
out = fluid.layers.fc(
input=fc2,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(scale=0.005)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)))
return out
def conv_block(self, input, num_filter, groups):
"""
便捷型卷积结构,包含了batch_normal处理和max-pool处理
:param input:
:param num_filter:
:param groups:
:return:
"""
conv = input
for i in range(groups):
if i == groups - 1:
act = None
else:
act = 'relu'
conv = fluid.layers.conv2d(
input=conv,
num_filters=num_filter,
filter_size=3,
stride=1,
padding=1,
act=act,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Normal(scale=0.01)),
bias_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(value=0.0)))
conv = fluid.layers.batch_norm(input=conv, act=act)
return fluid.layers.pool2d(
input=conv, pool_size=2, pool_type='max', pool_stride=2)
def init_log_config():
"""
初始化日志相关配置
:return:
"""
global logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)
log_path = os.path.join(os.getcwd(), 'logs')
if not os.path.exists(log_path):
os.makedirs(log_path)
log_name = os.path.join(log_path, 'train.log')
sh = logging.StreamHandler()
fh = logging.FileHandler(log_name, mode='w')
fh.setLevel(logging.DEBUG)
formatter = logging.Formatter("%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
fh.setFormatter(formatter)
sh.setFormatter(formatter)
logger.addHandler(sh)
logger.addHandler(fh)
def init_train_parameters():
"""
初始化训练参数,主要是初始化图片数量,类别数
:return:
"""
train_file_list = os.path.join(train_parameters['data_dir'], train_parameters['train_file_list'])
label_list = os.path.join(train_parameters['data_dir'], train_parameters['label_file'])
index = 0
with codecs.open(label_list, encoding='utf-8') as flist:
lines = [line.strip() for line in flist]
for line in lines:
parts = line.strip().split()
train_parameters['label_dict'][parts[1]] = int(parts[0])
index += 1
train_parameters['class_dim'] = index
with codecs.open(train_file_list, encoding='utf-8') as flist:
lines = [line.strip() for line in flist]
train_parameters['image_count'] = len(lines)
def resize_img(img, target_size):
"""
强制缩放图片
:param img:
:param target_size:
:return:
"""
target_size = input_size
img = img.resize((target_size[1], target_size[2]), Image.BILINEAR)
return img
def random_crop(img, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.size[0]) / img.size[1]) / (w**2),
(float(img.size[1]) / img.size[0]) / (h**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, img.size[0] - w + 1)
j = np.random.randint(0, img.size[1] - h + 1)
img = img.crop((i, j, i + w, j + h))
img = img.resize((train_parameters['input_size'][1], train_parameters['input_size'][2]), Image.BILINEAR)
return img
def rotate_image(img):
"""
图像增强,增加随机旋转角度
"""
angle = np.random.randint(-14, 15)
img = img.rotate(angle)
return img
def random_brightness(img):
"""
图像增强,亮度调整
:param img:
:return:
"""
prob = np.random.uniform(0, 1)
if prob < train_parameters['image_enhance_strategy']['brightness_prob']:
brightness_delta = train_parameters['image_enhance_strategy']['brightness_delta']
delta = np.random.uniform(-brightness_delta, brightness_delta) + 1
img = ImageEnhance.Brightness(img).enhance(delta)
return img
def random_contrast(img):
"""
图像增强,对比度调整
:param img:
:return:
"""
prob = np.random.uniform(0, 1)
if prob < train_parameters['image_enhance_strategy']['contrast_prob']:
contrast_delta = train_parameters['image_enhance_strategy']['contrast_delta']
delta = np.random.uniform(-contrast_delta, contrast_delta) + 1
img = ImageEnhance.Contrast(img).enhance(delta)
return img
def random_saturation(img):
"""
图像增强,饱和度调整
:param img:
:return:
"""
prob = np.random.uniform(0, 1)
if prob < train_parameters['image_enhance_strategy']['saturation_prob']:
saturation_delta = train_parameters['image_enhance_strategy']['saturation_delta']
delta = np.random.uniform(-saturation_delta, saturation_delta) + 1
img = ImageEnhance.Color(img).enhance(delta)
return img
def random_hue(img):
"""
图像增强,色度调整
:param img:
:return:
"""
prob = np.random.uniform(0, 1)
if prob < train_parameters['image_enhance_strategy']['hue_prob']:
hue_delta = train_parameters['image_enhance_strategy']['hue_delta']
delta = np.random.uniform(-hue_delta, hue_delta)
img_hsv = np.array(img.convert('HSV'))
img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta
img = Image.fromarray(img_hsv, mode='HSV').convert('RGB')
return img
def distort_color(img):
"""
概率的图像增强
:param img:
:return:
"""
prob = np.random.uniform(0, 1)
# Apply different distort order
if prob < 0.35:
img = random_brightness(img)
img = random_contrast(img)
img = random_saturation(img)
img = random_hue(img)
elif prob < 0.7:
img = random_brightness(img)
img = random_saturation(img)
img = random_hue(img)
img = random_contrast(img)
return img
def custom_image_reader(file_list, data_dir, mode):
"""
自定义用户图片读取器,先初始化图片种类,数量
:param file_list:
:param data_dir:
:param mode:
:return:
"""
with codecs.open(file_list) as flist:
lines = [line.strip() for line in flist]
def reader():
np.random.shuffle(lines)
for line in lines:
if mode == 'train' or mode == 'val':
img_path, label = line.split()
img = Image.open(img_path)
try:
if img.mode != 'RGB':
img = img.convert('RGB')
if train_parameters['image_enhance_strategy']['need_distort'] == True:
img = distort_color(img)
if train_parameters['image_enhance_strategy']['need_rotate'] == True:
img = rotate_image(img)
if train_parameters['image_enhance_strategy']['need_crop'] == True:
img = random_crop(img, train_parameters['input_size'])
if train_parameters['image_enhance_strategy']['need_flip'] == True:
mirror = int(np.random.uniform(0, 2))
if mirror == 1:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
# HWC--->CHW && normalized
img = np.array(img).astype('float32')
img -= train_parameters['mean_rgb']
img = img.transpose((2, 0, 1)) # HWC to CHW
img *= 0.007843 # 像素值归一化
yield img, int(label)
except Exception as e:
pass # 以防某些图片读取处理出错,加异常处理
elif mode == 'test':
img_path = os.path.join(data_dir, line)
img = Image.open(img_path)
if img.mode != 'RGB':
img = img.convert('RGB')
img = resize_img(img, train_parameters['input_size'])
# HWC--->CHW && normalized
img = np.array(img).astype('float32')
img -= train_parameters['mean_rgb']
img = img.transpose((2, 0, 1)) # HWC to CHW
img *= 0.007843 # 像素值归一化
yield img
return reader
def optimizer_momentum_setting():
"""
阶梯型的学习率适合比较大规模的训练数据
"""
learning_strategy = train_parameters['momentum_strategy']
batch_size = train_parameters["train_batch_size"]
iters = train_parameters["image_count"] // batch_size
lr = learning_strategy['learning_rate']
boundaries = [i * iters for i in learning_strategy["lr_epochs"]]
values = [i * lr for i in learning_strategy["lr_decay"]]
learning_rate = fluid.layers.piecewise_decay(boundaries, values)
optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9)
return optimizer
def optimizer_rms_setting():
"""
阶梯型的学习率适合比较大规模的训练数据
"""
batch_size = train_parameters["train_batch_size"]
iters = train_parameters["image_count"] // batch_size
learning_strategy = train_parameters['rsm_strategy']
lr = learning_strategy['learning_rate']
boundaries = [i * iters for i in learning_strategy["lr_epochs"]]
values = [i * lr for i in learning_strategy["lr_decay"]]
optimizer = fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(boundaries, values))
return optimizer
def optimizer_sgd_setting():
"""
loss下降相对较慢,但是最终效果不错,阶梯型的学习率适合比较大规模的训练数据
"""
learning_strategy = train_parameters['sgd_strategy']
batch_size = train_parameters["train_batch_size"]
iters = train_parameters["image_count"] // batch_size
lr = learning_strategy['learning_rate']
boundaries = [i * iters for i in learning_strategy["lr_epochs"]]
values = [i * lr for i in learning_strategy["lr_decay"]]
learning_rate = fluid.layers.piecewise_decay(boundaries, values)
optimizer = fluid.optimizer.SGD(learning_rate=learning_rate)
return optimizer
def optimizer_adam_setting():
"""
能够比较快速的降低 loss,但是相对后期乏力
"""
learning_strategy = train_parameters['adam_strategy']
learning_rate = learning_strategy['learning_rate']
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
return optimizer
def load_params(exe, program):
if train_parameters['continue_train'] and os.path.exists(train_parameters['save_persistable_dir']):
logger.info('load params from retrain model')
fluid.io.load_persistables(executor=exe,
dirname=train_parameters['save_persistable_dir'],
main_program=program)
elif train_parameters['pretrained'] and os.path.exists(train_parameters['pretrained_dir']):
logger.info('load params from pretrained model')
def if_exist(var):
return os.path.exists(os.path.join(train_parameters['pretrained_dir'], var.name))
fluid.io.load_vars(exe, train_parameters['pretrained_dir'], main_program=program,
predicate=if_exist)
def train():
train_prog = fluid.Program()
train_startup = fluid.Program()
logger.info("create prog success")
logger.info("train config: %s", str(train_parameters))
logger.info("build input custom reader and data feeder")
file_list = os.path.join(train_parameters['data_dir'], "train.txt")
mode = train_parameters['mode']
batch_reader = paddle.batch(custom_image_reader(file_list, train_parameters['data_dir'], mode),
batch_size=train_parameters['train_batch_size'],
drop_last=True)
place = fluid.CUDAPlace(0) if train_parameters['use_gpu'] else fluid.CPUPlace()
# 定义输入数据的占位符
img = fluid.layers.data(name='img', shape=train_parameters['input_size'], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
# 选取不同的网络
logger.info("build newwork")
model = VGGNet(layers=16)
out = model.net(input=img, class_dim=train_parameters['class_dim'])
cost = fluid.layers.cross_entropy(out, label)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
# 选取不同的优化器
optimizer = optimizer_rms_setting()
# optimizer = optimizer_momentum_setting()
# optimizer = optimizer_sgd_setting()
# optimizer = optimizer_adam_setting()
optimizer.minimize(avg_cost)
exe = fluid.Executor(place)
main_program = fluid.default_main_program()
exe.run(fluid.default_startup_program())
train_fetch_list = [avg_cost.name, acc_top1.name, out.name]
load_params(exe, main_program)
# 训练循环主体
stop_strategy = train_parameters['early_stop']
successive_limit = stop_strategy['successive_limit']
sample_freq = stop_strategy['sample_frequency']
good_acc1 = stop_strategy['good_acc1']
successive_count = 0
stop_train = False
total_batch_count = 0
for pass_id in range(train_parameters["num_epochs"]):
logger.info("current pass: %d, start read image", pass_id)
batch_id = 0
for step_id, data in enumerate(batch_reader()):
t1 = time.time()
loss, acc1, pred_ot = exe.run(main_program,
feed=feeder.feed(data),
fetch_list=train_fetch_list)
t2 = time.time()
batch_id += 1
total_batch_count += 1
period = t2 - t1
loss = np.mean(np.array(loss))
acc1 = np.mean(np.array(acc1))
if batch_id % 10 == 0:
logger.info("Pass {0}, trainbatch {1}, loss {2}, acc1 {3}, time {4}".format(pass_id, batch_id, loss, acc1,
"%2.2f sec" % period))
# 简单的提前停止策略,认为连续达到某个准确率就可以停止了
if acc1 >= good_acc1:
successive_count += 1
logger.info("current acc1 {0} meets good {1}, successive count {2}".format(acc1, good_acc1, successive_count))
fluid.io.save_inference_model(dirname=train_parameters['save_freeze_dir'],
feeded_var_names=['img'],
target_vars=[out],
main_program=main_program,
executor=exe)
if successive_count >= successive_limit:
logger.info("end training")
stop_train = True
break
else:
successive_count = 0
# 通用的保存策略,减小意外停止的损失
if total_batch_count % sample_freq == 0:
logger.info("temp save {0} batch train result, current acc1 {1}".format(total_batch_count, acc1))
fluid.io.save_persistables(dirname=train_parameters['save_persistable_dir'],
main_program=main_program,
executor=exe)
if stop_train:
break
logger.info("training till last epcho, end training")
fluid.io.save_persistables(dirname=train_parameters['save_persistable_dir'],
main_program=main_program,
executor=exe)
fluid.io.save_inference_model(dirname=train_parameters['save_freeze_dir'],
feeded_var_names=['img'],
target_vars=[out],
main_program=main_program,
executor=exe)
if __name__ == '__main__':
init_log_config()
init_train_parameters()
train()
`
打印出来的异常栈:
578 init_log_config() 579 init_train_parameters() --> 580 train() in train() 530 loss, acc1, pred_ot = exe.run(main_program, 531 feed=feeder.feed(data), --> 532 fetch_list=train_fetch_list) 533 t2 = time.time() 534 batch_id += 1 /opt/conda/lib/python3.6/site-packages/paddle/fluid/executor.py in run(self, program, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache) 523 scope=scope, 524 return_numpy=return_numpy, --> 525 use_program_cache=use_program_cache) 526 527 program.compile(scope, self.place) /opt/conda/lib/python3.6/site-packages/paddle/fluid/executor.py in run(self, program, exe, feed, fetch_list, feed_var_name, fetch_var_name, scope, return_numpy, use_program_cache) 589 590 self.feed_data(program, feed, feed_var_name, scope) --> 591 exe.run(program.desc, scope, 0, True, True, fetch_var_name) 592 outs = self.fetch_data(fetch_list, fetch_var_name, scope) 593 if return_numpy: EnforceNotMet: Invoke operator conv2d_grad error. Python Callstacks: File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/framework.py", line 1317, in append_op attrs=kwargs.get("attrs", None)) File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/layer_helper.py", line 56, in append_op return self.main_program.current_block().append_op(*args, **kwargs) File "/opt/conda/lib/python3.6/site-packages/paddle/fluid/layers/nn.py", line 1994, in conv2d 'fuse_relu_before_depthwise_conv': False File "", line 178, in conv_block initializer=fluid.initializer.Constant(value=0.0))) File "", line 118, in net conv1 = self.conv_block(input, 64, nums[0]) File "", line 499, in train out = model.net(input=img, class_dim=train_parameters['class_dim']) File "", line 580, in train() File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes if self.run_code(code, result): File "/opt/conda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell interactivity=interactivity, compiler=compiler, result=result) File "/opt/conda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "/opt/conda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute res = shell.run_cell(code, store_history=store_history, silent=silent) File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 397, in execute_request user_expressions, allow_stdin) File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 232, in dispatch_shell handler(stream, idents, msg) File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 281, in dispatcher return self.dispatch_shell(stream, msg) File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper return fn(*args, **kwargs) File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 438, in run_callback callback(*args, *kwargs) File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 486, in _handle_recv self._run_callback(callback, msg) File "/opt/conda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 456, in _handle_events self._handle_recv() File "/opt/conda/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper return fn(args, kwargs) File "/opt/conda/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start handler_func(fd_obj, events) File "/opt/conda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 478, in start self.io_loop.start() File "/opt/conda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance app.start() File "/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in app.launch_new_instance() File "/opt/conda/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/opt/conda/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) C++ Callstacks: CUDNN_STATUS_BAD_PARAM at [/paddle/paddle/fluid/operators/conv_cudnn_op.cu.cc:537] PaddlePaddle Call Stacks: 0 0x7faa3a7de37dp void paddle::platform::EnforceNotMet::Initstd::string(std::string, char const, int) + 365 1 0x7faa3a7de6c7p paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const, int) + 87 2 0x7faa3b1b79c2p paddle::operators::CUDNNConvGradOpKernel::Compute(paddle::framework::ExecutionContext const&) const + 7362 3 0x7faa3b1b8343p std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::CUDNNConvGradOpKernel, paddle::operators::CUDNNConvGradOpKernel, paddle::operators::CUDNNConvGradOpKernelpaddle::platform::float16 >::operator()(char const, char const, int) const::{lambda(paddle::framework::ExecutionContext const&)#1 (closed)}>::M_invoke(std::Any_data const&, paddle::framework::ExecutionContext const&) + 35 4 0x7faa3c2bb903p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 659 5 0x7faa3c2b9175p paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 341 6 0x7faa3a8fa9d2p paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool, bool) + 226 7 0x7faa3a8fc91fp paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::vector<std::string, std::allocatorstd::string > const&, bool) + 143 8 0x7faa3a7ce2fep 9 0x7faa3a8091eep 10 0x559054857744p _PyCFunction_FastCallDict + 340 11 0x5590548de57ep 12 0x55905490338ap _PyEval_EvalFrameDefault + 778 13 0x5590548d78e4p 14 0x5590548d8771p 15 0x5590548de505p 16 0x559054904147p _PyEval_EvalFrameDefault + 4295 17 0x5590548d78e4p 18 0x5590548d8771p 19 0x5590548de505p 20 0x559054904147p _PyEval_EvalFrameDefault + 4295 21 0x5590548d853bp 22 0x5590548de505p 23 0x55905490338ap _PyEval_EvalFrameDefault + 778 24 0x5590548d9289p PyEval_EvalCodeEx + 809 25 0x5590548da01cp PyEval_EvalCode + 28 26 0x559054900d97p 27 0x559054857681p _PyCFunction_FastCallDict + 145 28 0x5590548de42cp 29 0x55905490338ap _PyEval_EvalFrameDefault + 778 30 0x5590548d78e4p 31 0x5590548d8771p 32 0x5590548de505p 33 0x55905490338ap _PyEval_EvalFrameDefault + 778 34 0x5590548d78e4p 35 0x5590548d8771p 36 0x5590548de505p 37 0x559054904147p _PyEval_EvalFrameDefault + 4295 38 0x5590548d7bfep 39 0x5590548d8e6ap _PyFunction_FastCallDict + 986 40 0x559054857b0fp _PyObject_FastCallDict + 623 41 0x55905485c6a3p _PyObject_Call_Prepend + 99 42 0x55905485754ep PyObject_Call + 62 43 0x559054904a6cp _PyEval_EvalFrameDefault + 6636 44 0x5590548d7a76p 45 0x5590548d8771p 46 0x5590548de505p 47 0x559054904147p _PyEval_EvalFrameDefault + 4295 48 0x5590548d78e4p 49 0x5590548d8771p 50 0x5590548de505p 51 0x55905490338ap _PyEval_EvalFrameDefault + 778 52 0x5590548d853bp 53 0x5590548de505p 54 0x55905490338ap _PyEval_EvalFrameDefault + 778 55 0x5590548d853bp 56 0x5590548de505p 57 0x55905490338ap _PyEval_EvalFrameDefault + 778 58 0x5590548d98c6p PyEval_EvalCodeEx + 2406 59 0x5590548da1a6p 60 0x55905485754ep PyObject_Call + 62 61 0x559054904a6cp _PyEval_EvalFrameDefault + 6636 62 0x5590548d98c6p PyEval_EvalCodeEx + 2406 63 0x5590548da1a6p 64 0x55905485754ep PyObject_Call + 62 65 0x559054904a6cp _PyEval_EvalFrameDefault + 6636 66 0x5590548d78e4p 67 0x5590548d8771p 68 0x5590548de505p 69 0x55905490338ap _PyEval_EvalFrameDefault + 778 70 0x5590548d853bp 71 0x5590548de505p 72 0x55905490338ap _PyEval_EvalFrameDefault + 778 73 0x5590548d8babp _PyFunction_FastCallDict + 283 74 0x559054857b0fp _PyObject_FastCallDict + 623 75 0x55905485c6a3p _PyObject_Call_Prepend + 99 76 0x55905485754ep PyObject_Call + 62 77 0x559054904a6cp _PyEval_EvalFrameDefault + 6636 78 0x5590548d7bfep 79 0x5590548d8771p 80 0x5590548de505p 81 0x55905490338ap _PyEval_EvalFrameDefault + 778 82 0x5590548d853bp 83 0x5590548de505p 84 0x55905490338ap _PyEval_EvalFrameDefault + 778 85 0x5590548d853bp 86 0x5590548de505p 87 0x55905490338ap _PyEval_EvalFrameDefault + 778 88 0x5590548d78e4p 89 0x5590548d8771p 90 0x5590548de505p 91 0x55905490338ap _PyEval_EvalFrameDefault + 778 92 0x5590548d9289p PyEval_EvalCodeEx + 809 93 0x5590548da01cp PyEval_EvalCode + 28 94 0x559054900d97p 95 0x559054857681p _PyCFunction_FastCallDict + 145 96 0x5590548de42cp 97 0x55905490338ap _PyEval_EvalFrameDefault + 778 98 0x5590548d78e4p 99 0x5590548d8771p