提交 ec2ee103 编写于 作者: S Shaojie Bai

update for pytorch 1.0 with nograd

上级 a13a6b82
......@@ -100,10 +100,11 @@ def train(epoch):
def evaluate():
model.eval()
output = model(X_test)
test_loss = F.mse_loss(output, Y_test)
print('\nTest set: Average loss: {:.6f}\n'.format(test_loss.item()))
return test_loss.item()
with torch.no_grad():
output = model(X_test)
test_loss = F.mse_loss(output, Y_test)
print('\nTest set: Average loss: {:.6f}\n'.format(test_loss.item()))
return test_loss.item()
for ep in range(1, epochs+1):
......
......@@ -85,14 +85,15 @@ optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr)
def evaluate():
model.eval()
out = model(test_x.unsqueeze(1).contiguous())
loss = criterion(out.view(-1, n_classes), test_y.view(-1))
pred = out.view(-1, n_classes).data.max(1, keepdim=True)[1]
correct = pred.eq(test_y.data.view_as(pred)).cpu().sum()
counter = out.view(-1, n_classes).size(0)
print('\nTest set: Average loss: {:.8f} | Accuracy: {:.4f}\n'.format(
loss.item(), 100. * correct / counter))
return loss.item()
with torch.no_grad():
out = model(test_x.unsqueeze(1).contiguous())
loss = criterion(out.view(-1, n_classes), test_y.view(-1))
pred = out.view(-1, n_classes).data.max(1, keepdim=True)[1]
correct = pred.eq(test_y.data.view_as(pred)).cpu().sum()
counter = out.view(-1, n_classes).size(0)
print('\nTest set: Average loss: {:.8f} | Accuracy: {:.4f}\n'.format(
loss.item(), 100. * correct / counter))
return loss.item()
def train(ep):
......
......@@ -88,18 +88,19 @@ def evaluate(data_source):
total_loss = 0
processed_data_size = 0
correct = 0
for i in range(len(data_source)):
data, targets = torch.LongTensor(data_source[i]).view(1, -1), torch.LongTensor([data_source[i][-1]]).view(1, -1)
data, targets = Variable(data), Variable(targets)
if args.cuda:
data, targets = data.cuda(), targets.cuda()
output = model(data)
final_output = output[:, -1].contiguous().view(-1, n_words)
final_target = targets[:, -1].contiguous().view(-1)
loss = criterion(final_output, final_target)
total_loss += loss.data
processed_data_size += 1
return total_loss.item() / processed_data_size
with torch.no_grad():
for i in range(len(data_source)):
data, targets = torch.LongTensor(data_source[i]).view(1, -1), torch.LongTensor([data_source[i][-1]]).view(1, -1)
data, targets = Variable(data), Variable(targets)
if args.cuda:
data, targets = data.cuda(), targets.cuda()
output = model(data)
final_output = output[:, -1].contiguous().view(-1, n_words)
final_target = targets[:, -1].contiguous().view(-1)
loss = criterion(final_output, final_target)
total_loss += loss.data
processed_data_size += 1
return total_loss.item() / processed_data_size
def train():
......
......@@ -97,23 +97,24 @@ def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data = data.view(-1, input_channels, seq_length)
if args.permute:
data = data[:, :, permute]
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
with torch.no_grad():
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data = data.view(-1, input_channels, seq_length)
if args.permute:
data = data[:, :, permute]
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_loss
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_loss
if __name__ == "__main__":
......
......@@ -68,19 +68,20 @@ def evaluate(X_data, name='Eval'):
eval_idx_list = np.arange(len(X_data), dtype="int32")
total_loss = 0.0
count = 0
for idx in eval_idx_list:
data_line = X_data[idx]
x, y = Variable(data_line[:-1]), Variable(data_line[1:])
if args.cuda:
x, y = x.cuda(), y.cuda()
output = model(x.unsqueeze(0)).squeeze(0)
loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
torch.matmul((1-y), torch.log(1-output).float().t()))
total_loss += loss.item()
count += output.size(0)
eval_loss = total_loss / count
print(name + " loss: {:.5f}".format(eval_loss))
return eval_loss
with torch.no_grad():
for idx in eval_idx_list:
data_line = X_data[idx]
x, y = Variable(data_line[:-1]), Variable(data_line[1:])
if args.cuda:
x, y = x.cuda(), y.cuda()
output = model(x.unsqueeze(0)).squeeze(0)
loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
torch.matmul((1-y), torch.log(1-output).float().t()))
total_loss += loss.item()
count += output.size(0)
eval_loss = total_loss / count
print(name + " loss: {:.5f}".format(eval_loss))
return eval_loss
def train(ep):
......
......@@ -92,23 +92,24 @@ def evaluate(data_source):
model.eval()
total_loss = 0
processed_data_size = 0
for i in range(0, data_source.size(1) - 1, args.validseqlen):
if i + args.seq_len - args.validseqlen >= data_source.size(1) - 1:
continue
data, targets = get_batch(data_source, i, args, evaluation=True)
output = model(data)
# Discard the effective history, just like in training
eff_history = args.seq_len - args.validseqlen
final_output = output[:, eff_history:].contiguous().view(-1, n_words)
final_target = targets[:, eff_history:].contiguous().view(-1)
loss = criterion(final_output, final_target)
# Note that we don't add TAR loss here
total_loss += (data.size(1) - eff_history) * loss.item()
processed_data_size += data.size(1) - eff_history
return total_loss / processed_data_size
with torch.no_grad():
for i in range(0, data_source.size(1) - 1, args.validseqlen):
if i + args.seq_len - args.validseqlen >= data_source.size(1) - 1:
continue
data, targets = get_batch(data_source, i, args, evaluation=True)
output = model(data)
# Discard the effective history, just like in training
eff_history = args.seq_len - args.validseqlen
final_output = output[:, eff_history:].contiguous().view(-1, n_words)
final_target = targets[:, eff_history:].contiguous().view(-1)
loss = criterion(final_output, final_target)
# Note that we don't add TAR loss here
total_loss += (data.size(1) - eff_history) * loss.item()
processed_data_size += data.size(1) - eff_history
return total_loss / processed_data_size
def train():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册