update for pytorch 1.0 with nograd

ec2ee103 · Shaojie Bai · a13a6b82 · ec2ee103 · ec2ee103 · ec2ee103
6 changed file
--- a/TCN/adding_problem/add_test.py
+++ b/TCN/adding_problem/add_test.py
@@ -100,10 +100,11 @@ def train(epoch):

 def evaluate():
    model.eval()
-    output = model(X_test)
-    test_loss = F.mse_loss(output, Y_test)
-    print('\nTest set: Average loss: {:.6f}\n'.format(test_loss.item()))
-    return test_loss.item()
+    with torch.no_grad():
+        output = model(X_test)
+        test_loss = F.mse_loss(output, Y_test)
+        print('\nTest set: Average loss: {:.6f}\n'.format(test_loss.item()))
+        return test_loss.item()


 for ep in range(1, epochs+1):

--- a/TCN/copy_memory/copymem_test.py
+++ b/TCN/copy_memory/copymem_test.py
@@ -85,14 +85,15 @@ optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr)

 def evaluate():
    model.eval()
-    out = model(test_x.unsqueeze(1).contiguous())
-    loss = criterion(out.view(-1, n_classes), test_y.view(-1))
-    pred = out.view(-1, n_classes).data.max(1, keepdim=True)[1]
-    correct = pred.eq(test_y.data.view_as(pred)).cpu().sum()
-    counter = out.view(-1, n_classes).size(0)
-    print('\nTest set: Average loss: {:.8f}  |  Accuracy: {:.4f}\n'.format(
-        loss.item(), 100. * correct / counter))
-    return loss.item()
+    with torch.no_grad():
+        out = model(test_x.unsqueeze(1).contiguous())
+        loss = criterion(out.view(-1, n_classes), test_y.view(-1))
+        pred = out.view(-1, n_classes).data.max(1, keepdim=True)[1]
+        correct = pred.eq(test_y.data.view_as(pred)).cpu().sum()
+        counter = out.view(-1, n_classes).size(0)
+        print('\nTest set: Average loss: {:.8f}  |  Accuracy: {:.4f}\n'.format(
+            loss.item(), 100. * correct / counter))
+        return loss.item()


 def train(ep):

--- a/TCN/lambada_language/lambada_test.py
+++ b/TCN/lambada_language/lambada_test.py
@@ -88,18 +88,19 @@ def evaluate(data_source):
    total_loss = 0
    processed_data_size = 0
    correct = 0
-    for i in range(len(data_source)):
-        data, targets = torch.LongTensor(data_source[i]).view(1, -1), torch.LongTensor([data_source[i][-1]]).view(1, -1)
-        data, targets = Variable(data), Variable(targets)
-        if args.cuda:
-            data, targets = data.cuda(), targets.cuda()
-        output = model(data)
-        final_output = output[:, -1].contiguous().view(-1, n_words)
-        final_target = targets[:, -1].contiguous().view(-1)
-        loss = criterion(final_output, final_target)
-        total_loss += loss.data
-        processed_data_size += 1
-    return total_loss.item() / processed_data_size
+    with torch.no_grad():
+        for i in range(len(data_source)):
+            data, targets = torch.LongTensor(data_source[i]).view(1, -1), torch.LongTensor([data_source[i][-1]]).view(1, -1)
+            data, targets = Variable(data), Variable(targets)
+            if args.cuda:
+                data, targets = data.cuda(), targets.cuda()
+            output = model(data)
+            final_output = output[:, -1].contiguous().view(-1, n_words)
+            final_target = targets[:, -1].contiguous().view(-1)
+            loss = criterion(final_output, final_target)
+            total_loss += loss.data
+            processed_data_size += 1
+        return total_loss.item() / processed_data_size


 def train():

--- a/TCN/mnist_pixel/pmnist_test.py
+++ b/TCN/mnist_pixel/pmnist_test.py
@@ -97,23 +97,24 @@ def test():
    model.eval()
    test_loss = 0
    correct = 0
-    for data, target in test_loader:
-        if args.cuda:
-            data, target = data.cuda(), target.cuda()
-        data = data.view(-1, input_channels, seq_length)
-        if args.permute:
-            data = data[:, :, permute]
-        data, target = Variable(data, volatile=True), Variable(target)
-        output = model(data)
-        test_loss += F.nll_loss(output, target, size_average=False).item()
-        pred = output.data.max(1, keepdim=True)[1]
-        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
+    with torch.no_grad():
+        for data, target in test_loader:
+            if args.cuda:
+                data, target = data.cuda(), target.cuda()
+            data = data.view(-1, input_channels, seq_length)
+            if args.permute:
+                data = data[:, :, permute]
+            data, target = Variable(data, volatile=True), Variable(target)
+            output = model(data)
+            test_loss += F.nll_loss(output, target, size_average=False).item()
+            pred = output.data.max(1, keepdim=True)[1]
+            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

-    test_loss /= len(test_loader.dataset)
-    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
-        test_loss, correct, len(test_loader.dataset),
-        100. * correct / len(test_loader.dataset)))
-    return test_loss
+        test_loss /= len(test_loader.dataset)
+        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+            test_loss, correct, len(test_loader.dataset),
+            100. * correct / len(test_loader.dataset)))
+        return test_loss


 if __name__ == "__main__":

--- a/TCN/poly_music/music_test.py
+++ b/TCN/poly_music/music_test.py
@@ -68,19 +68,20 @@ def evaluate(X_data, name='Eval'):
    eval_idx_list = np.arange(len(X_data), dtype="int32")
    total_loss = 0.0
    count = 0
-    for idx in eval_idx_list:
-        data_line = X_data[idx]
-        x, y = Variable(data_line[:-1]), Variable(data_line[1:])
-        if args.cuda:
-            x, y = x.cuda(), y.cuda()
-        output = model(x.unsqueeze(0)).squeeze(0)
-        loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
-                            torch.matmul((1-y), torch.log(1-output).float().t()))
-        total_loss += loss.item()
-        count += output.size(0)
-    eval_loss = total_loss / count
-    print(name + " loss: {:.5f}".format(eval_loss))
-    return eval_loss
+    with torch.no_grad():
+        for idx in eval_idx_list:
+            data_line = X_data[idx]
+            x, y = Variable(data_line[:-1]), Variable(data_line[1:])
+            if args.cuda:
+                x, y = x.cuda(), y.cuda()
+            output = model(x.unsqueeze(0)).squeeze(0)
+            loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
+                                torch.matmul((1-y), torch.log(1-output).float().t()))
+            total_loss += loss.item()
+            count += output.size(0)
+        eval_loss = total_loss / count
+        print(name + " loss: {:.5f}".format(eval_loss))
+        return eval_loss


 def train(ep):

--- a/TCN/word_cnn/word_cnn_test.py
+++ b/TCN/word_cnn/word_cnn_test.py
@@ -92,23 +92,24 @@ def evaluate(data_source):
    model.eval()
    total_loss = 0
    processed_data_size = 0
-    for i in range(0, data_source.size(1) - 1, args.validseqlen):
-        if i + args.seq_len - args.validseqlen >= data_source.size(1) - 1:
-            continue
-        data, targets = get_batch(data_source, i, args, evaluation=True)
-        output = model(data)
-
-        # Discard the effective history, just like in training
-        eff_history = args.seq_len - args.validseqlen
-        final_output = output[:, eff_history:].contiguous().view(-1, n_words)
-        final_target = targets[:, eff_history:].contiguous().view(-1)
-
-        loss = criterion(final_output, final_target)
-
-        # Note that we don't add TAR loss here
-        total_loss += (data.size(1) - eff_history) * loss.item()
-        processed_data_size += data.size(1) - eff_history
-    return total_loss / processed_data_size
+    with torch.no_grad():
+        for i in range(0, data_source.size(1) - 1, args.validseqlen):
+            if i + args.seq_len - args.validseqlen >= data_source.size(1) - 1:
+                continue
+            data, targets = get_batch(data_source, i, args, evaluation=True)
+            output = model(data)
+
+            # Discard the effective history, just like in training
+            eff_history = args.seq_len - args.validseqlen
+            final_output = output[:, eff_history:].contiguous().view(-1, n_words)
+            final_target = targets[:, eff_history:].contiguous().view(-1)
+
+            loss = criterion(final_output, final_target)
+
+            # Note that we don't add TAR loss here
+            total_loss += (data.size(1) - eff_history) * loss.item()
+            processed_data_size += data.size(1) - eff_history
+        return total_loss / processed_data_size


 def train():