Search This Blog

Thursday 27 April 2017

MNIST Study in Pytorch - Rings and Bells

MNIST is the hello world into ML world. MNIST  dataset is a collection of images of numbers 0..9 and labels. The images are of size 28x28. Lets just take few sample from the dataset to get a feel of how it looks like.

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

class Args:
    pass

args = Args()
args.batch_size = 32
args.cuda = True
args.lr = 0.001
args.momentum = 0.01
args.epochs = 10
args.log_interval = 10

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, 
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image
import pprint
import numpy 

num_of_samples = 5

fig = plt.figure(1,(8., 8.))
grid = ImageGrid(fig, 111,
                 nrows_ncols=(num_of_samples, num_of_samples),   
                 axes_pad=0.1)

output = numpy.zeros(num_of_samples ** 2)
for i, (data, target) in enumerate(test_loader):
    if i < 1: #dirty trick to take just one sample
        for j in range(num_of_samples ** 2):
            grid[j].matshow(Image.fromarray(data[j][0].numpy()))
            output[j] = target[j]
    else:
        break
           

output = output.reshape(num_of_samples, num_of_sample)
plt.show()


[[ 6.  9.  9.  5.  4.]
 [ 3.  6.  5.  0.  1.]
 [ 8.  1.  3.  6.  2.]
 [ 9.  4.  8.  8.  6.]
 [ 0.  6.  4.  2.  3.]]


You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.


class Model0(nn.Module):
    def __init__(self):
        super(Model0, self).__init__()
        self.output_layer = nn.Linear(28*28, 10)
       
    def forward(self, x):
        x = self.output_layer(x)
        return F.log_softmax(x)
 
class Model1(nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.input_layer = nn.Linear(28*28, 5)
        self.output_layer = nn.Linear(5, 10)
 
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model2(nn.Module):
    def __init__(self):
        super(Model2, self).__init__()
        self.input_layer = nn.Linear(28*28, 6)
        self.output_layer = nn.Linear(6, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model3(nn.Module):    
    def __init__(self):
        super(Model3, self).__init__()
        self.input_layer = nn.Linear(28*28, 7)
        self.output_layer = nn.Linear(7, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model4(nn.Module):
    def __init__(self):
        super(Model4, self).__init__()
        self.input_layer = nn.Linear(28*28, 8)
        self.output_layer = nn.Linear(8, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)


class Model5(nn.Module):
    def __init__(self):
        super(Model5, self).__init__()
        self.input_layer = nn.Linear(28*28, 9)
        self.output_layer = nn.Linear(9, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model6(nn.Module):    
    def __init__(self):
        super(Model6, self).__init__()
        self.input_layer = nn.Linear(28*28, 10)
        self.output_layer = nn.Linear(10, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model7(nn.Module):
    def __init__(self):
        super(Model7, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model8(nn.Module):
    def __init__(self):
        super(Model8, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.hidden_layer = nn.Linear(100, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model9(nn.Module):
    def __init__(self):
        super(Model9, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.hidden_layer = nn.Linear(100, 100)
        self.hidden_layer1 = nn.Linear(100, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.hidden_layer1(x)
        x = self.output_layer(x)        
        return F.log_softmax(x)

class Model10(nn.Module):
    def __init__(self):
        super(Model10, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.hidden_layer = nn.Linear(100, 100)
        self.hidden_layer1 = nn.Linear(100, 100)
        self.hidden_layer2 = nn.Linear(100, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.hidden_layer1(x)
        x = self.hidden_layer2(x)
        x = self.output_layer(x
        return F.log_softmax(x)

and lets train it

def train(epoch, model, print_every=10):
    optimizer = optim.SGD(model.parameters(),
           lr=args.lr, momentum=args.momentum)
    for i in range(epoch):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            if args.cuda:
                data, target = data.cuda(), target.cuda()
           
            data = data.view(args.batch_size , -1)
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            output = model(data)
        
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
           
       
        if i % print_every == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    i, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.data[0]))

for model in models:
     train(1000, model)

        
for i, model in enumerate(models):
    model.load_state_dict(torch.load('mnist_mlp_multiple_model{}.pth'.format(i)))
lets see how our network predicts the images.
[[ 6.  2.  9.  1.  8.]
 [ 5.  6.  5.  7.  5.]
 [ 4.  8.  6.  3.  0.]
 [ 6.  1.  0.  9.  3.]
 [ 7.  2.  8.  4.  4.]]
Most of the predictions look right. Lets run this over entire test dataset.
def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
             data, target = data.cuda(), target.cuda()
        
        data = data.view(data.size()[0], -1)
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()

    test_loss = test_loss
    test_loss /= len(test_loader) #
    print(' Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)
      '.format(
        test_loss,
        correct,
        len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)
      )
    )
   return 100. * correct / len(test_loader.dataset)



accuracy = []
for model in models:
    accuracy.append(test_tuts(model))

pprint.pprint(accuracy)

plt.plot(range(len(accuracy)), accuracy, linewidth=1.0)
plt.axis([0, 10, 0, 100])
plt.show()


pl.plot(range(len(accuracy)), accuracy, linewidth=1.0)
plt.axis([0, 10, 90, 93])
plt.show()

MNIST Study in PyTorch

MNIST is the hello world into ML world. MNIST  dataset is a collection of images of numbers 0..9 and labels. The images are of size 28x28. Lets just take few sample from the dataset to get a feel of how it looks like. 

[[ 6.  9.  9.  5.  4.]
 [ 3.  6.  5.  0.  1.]
 [ 8.  1.  3.  6.  2.]
 [ 9.  4.  8.  8.  6.]
 [ 0.  6.  4.  2.  3.]]


You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.

class Model0(nn.Module):
    def __init__(self):
        super(Model0, self).__init__()     
        self.output_layer = nn.Linear(28*28, 10)
   
    def forward(self, x):
        x = self.output_layer(x)
        return F.log_softmax(x)
and lets train it

def train(model):
    optimizer = optim.SGD(model.parameters(), 
                          lr=lr, 
                          momentum=momentum)
    model.train()
    for data, target in enumerate(train_loader):
       optimizer.zero_grad()
       data = data.view(batch_size , -1)
       data, target = Variable(data), Variable(target)    
       output = model(data)
 
       loss = F.nll_loss(output, target)
       loss.backward()
       optimizer.step()
 
model = Model0()
train(model)

lets see how our network predicts the images.

[[ 6.  2.  9.  1.  8.]
 [ 5.  6.  5.  7.  5.]
 [ 4.  8.  6.  3.  0.]
 [ 6.  1.  0.  9.  3.]
 [ 7.  2.  8.  4.  4.]]
Most of the predictions look right. Lets run this over entire test dataset.
def test_tuts(model):
    model.eval()
    test_loss, correct = 0, 0

    for data, target in test_loader:
        data = data.view(data.size()[0], -1)
        data, target = Variable(data), Variable(target)
        output = model(data)

        pred          = output.data.max(1)[1] 
        correct   += pred.eq(target.data).cpu().sum()
        test_loss += F.nll_loss(output, target).data[0]

    test_loss = test_loss
    test_loss /= len(test_loader) 
    print(
     'Avg loss: {:.4f}, Accuracy: {}/{}({:.0f}%)'
      .format(
         test_loss,
         correct,
         len(test_loader.dataset),
         100. * correct / len(test_loader.dataset)
      )
    )