Search This Blog

Showing posts with label deep learning. Show all posts
Showing posts with label deep learning. Show all posts

Thursday, 27 April 2017

MNIST Study in Pytorch - Rings and Bells

MNIST is the hello world into ML world. MNIST  dataset is a collection of images of numbers 0..9 and labels. The images are of size 28x28. Lets just take few sample from the dataset to get a feel of how it looks like.

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

class Args:
    pass

args = Args()
args.batch_size = 32
args.cuda = True
args.lr = 0.001
args.momentum = 0.01
args.epochs = 10
args.log_interval = 10

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, 
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)

import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image
import pprint
import numpy 

num_of_samples = 5

fig = plt.figure(1,(8., 8.))
grid = ImageGrid(fig, 111,
                 nrows_ncols=(num_of_samples, num_of_samples),   
                 axes_pad=0.1)

output = numpy.zeros(num_of_samples ** 2)
for i, (data, target) in enumerate(test_loader):
    if i < 1: #dirty trick to take just one sample
        for j in range(num_of_samples ** 2):
            grid[j].matshow(Image.fromarray(data[j][0].numpy()))
            output[j] = target[j]
    else:
        break
           

output = output.reshape(num_of_samples, num_of_sample)
plt.show()


[[ 6.  9.  9.  5.  4.]
 [ 3.  6.  5.  0.  1.]
 [ 8.  1.  3.  6.  2.]
 [ 9.  4.  8.  8.  6.]
 [ 0.  6.  4.  2.  3.]]


You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.


class Model0(nn.Module):
    def __init__(self):
        super(Model0, self).__init__()
        self.output_layer = nn.Linear(28*28, 10)
       
    def forward(self, x):
        x = self.output_layer(x)
        return F.log_softmax(x)
 
class Model1(nn.Module):
    def __init__(self):
        super(Model1, self).__init__()
        self.input_layer = nn.Linear(28*28, 5)
        self.output_layer = nn.Linear(5, 10)
 
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model2(nn.Module):
    def __init__(self):
        super(Model2, self).__init__()
        self.input_layer = nn.Linear(28*28, 6)
        self.output_layer = nn.Linear(6, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model3(nn.Module):    
    def __init__(self):
        super(Model3, self).__init__()
        self.input_layer = nn.Linear(28*28, 7)
        self.output_layer = nn.Linear(7, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model4(nn.Module):
    def __init__(self):
        super(Model4, self).__init__()
        self.input_layer = nn.Linear(28*28, 8)
        self.output_layer = nn.Linear(8, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)


class Model5(nn.Module):
    def __init__(self):
        super(Model5, self).__init__()
        self.input_layer = nn.Linear(28*28, 9)
        self.output_layer = nn.Linear(9, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model6(nn.Module):    
    def __init__(self):
        super(Model6, self).__init__()
        self.input_layer = nn.Linear(28*28, 10)
        self.output_layer = nn.Linear(10, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model7(nn.Module):
    def __init__(self):
        super(Model7, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model8(nn.Module):
    def __init__(self):
        super(Model8, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.hidden_layer = nn.Linear(100, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.output_layer(x)
        return F.log_softmax(x)

class Model9(nn.Module):
    def __init__(self):
        super(Model9, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.hidden_layer = nn.Linear(100, 100)
        self.hidden_layer1 = nn.Linear(100, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.hidden_layer1(x)
        x = self.output_layer(x)        
        return F.log_softmax(x)

class Model10(nn.Module):
    def __init__(self):
        super(Model10, self).__init__()
        self.input_layer = nn.Linear(28*28, 100)
        self.hidden_layer = nn.Linear(100, 100)
        self.hidden_layer1 = nn.Linear(100, 100)
        self.hidden_layer2 = nn.Linear(100, 100)
        self.output_layer = nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layer(x)
        x = self.hidden_layer1(x)
        x = self.hidden_layer2(x)
        x = self.output_layer(x
        return F.log_softmax(x)

and lets train it

def train(epoch, model, print_every=10):
    optimizer = optim.SGD(model.parameters(),
           lr=args.lr, momentum=args.momentum)
    for i in range(epoch):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            if args.cuda:
                data, target = data.cuda(), target.cuda()
           
            data = data.view(args.batch_size , -1)
            data, target = Variable(data), Variable(target)
            optimizer.zero_grad()
            output = model(data)
        
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
           
       
        if i % print_every == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    i, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.data[0]))

for model in models:
     train(1000, model)

        
for i, model in enumerate(models):
    model.load_state_dict(torch.load('mnist_mlp_multiple_model{}.pth'.format(i)))
lets see how our network predicts the images.
[[ 6.  2.  9.  1.  8.]
 [ 5.  6.  5.  7.  5.]
 [ 4.  8.  6.  3.  0.]
 [ 6.  1.  0.  9.  3.]
 [ 7.  2.  8.  4.  4.]]
Most of the predictions look right. Lets run this over entire test dataset.
def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
             data, target = data.cuda(), target.cuda()
        
        data = data.view(data.size()[0], -1)
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[1]
        correct += pred.eq(target.data).cpu().sum()

    test_loss = test_loss
    test_loss /= len(test_loader) #
    print(' Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)
      '.format(
        test_loss,
        correct,
        len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)
      )
    )
   return 100. * correct / len(test_loader.dataset)



accuracy = []
for model in models:
    accuracy.append(test_tuts(model))

pprint.pprint(accuracy)

plt.plot(range(len(accuracy)), accuracy, linewidth=1.0)
plt.axis([0, 10, 0, 100])
plt.show()


pl.plot(range(len(accuracy)), accuracy, linewidth=1.0)
plt.axis([0, 10, 90, 93])
plt.show()

MNIST Study in PyTorch

MNIST is the hello world into ML world. MNIST  dataset is a collection of images of numbers 0..9 and labels. The images are of size 28x28. Lets just take few sample from the dataset to get a feel of how it looks like. 

[[ 6.  9.  9.  5.  4.]
 [ 3.  6.  5.  0.  1.]
 [ 8.  1.  3.  6.  2.]
 [ 9.  4.  8.  8.  6.]
 [ 0.  6.  4.  2.  3.]]


You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.

class Model0(nn.Module):
    def __init__(self):
        super(Model0, self).__init__()     
        self.output_layer = nn.Linear(28*28, 10)
   
    def forward(self, x):
        x = self.output_layer(x)
        return F.log_softmax(x)
and lets train it

def train(model):
    optimizer = optim.SGD(model.parameters(), 
                          lr=lr, 
                          momentum=momentum)
    model.train()
    for data, target in enumerate(train_loader):
       optimizer.zero_grad()
       data = data.view(batch_size , -1)
       data, target = Variable(data), Variable(target)    
       output = model(data)
 
       loss = F.nll_loss(output, target)
       loss.backward()
       optimizer.step()
 
model = Model0()
train(model)

lets see how our network predicts the images.

[[ 6.  2.  9.  1.  8.]
 [ 5.  6.  5.  7.  5.]
 [ 4.  8.  6.  3.  0.]
 [ 6.  1.  0.  9.  3.]
 [ 7.  2.  8.  4.  4.]]
Most of the predictions look right. Lets run this over entire test dataset.
def test_tuts(model):
    model.eval()
    test_loss, correct = 0, 0

    for data, target in test_loader:
        data = data.view(data.size()[0], -1)
        data, target = Variable(data), Variable(target)
        output = model(data)

        pred          = output.data.max(1)[1] 
        correct   += pred.eq(target.data).cpu().sum()
        test_loss += F.nll_loss(output, target).data[0]

    test_loss = test_loss
    test_loss /= len(test_loader) 
    print(
     'Avg loss: {:.4f}, Accuracy: {}/{}({:.0f}%)'
      .format(
         test_loss,
         correct,
         len(test_loader.dataset),
         100. * correct / len(test_loader.dataset)
      )
    )

   

Friday, 24 March 2017

[WIP] Deep Learning hardware for the Commons

Deep learning required huge amount of processing power. Building such in all households is infeasible in near future (even if there is a breakthrough in hardware technologies for ML, it will mostly be costlier than already available options).

All ML systems consists of two phases, training and prediction(or classification, in general application). Fortunately, actual prediction process requires lesser processing power than training.

It might be useful to cooperatively design/build and run a powerful machine for training purposes and then use the model developed from training in less powerful machines like raspberry-pi.

Note that the training process need not end once and for all. We can collect new data sample after deployment, even though not every application area cater to this process, but certainly there are areas we can harness this.
 
Large System with all the rings and bells.

A compute cluster with heterogenous nodes, nodes with different combinations of cpu/gpu, nodes with different kinds of FPGA based accelerators (along with tensorflow model graph to FPGA compiler)

FPGA can also be used to run trained models for deployment.