MNIST is the hello world into ML world. MNIST dataset is a collection of images of numbers 0..9 and labels. The images are of size 28x28. Lets just take few sample from the dataset to get a feel of how it looks like.
You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms from torch.autograd import Variable class Args: pass args = Args() args.batch_size = 32 args.cuda = True args.lr = 0.001 args.momentum = 0.01 args.epochs = 10 args.log_interval = 10 kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import ImageGrid from PIL import Image import pprint import numpy num_of_samples = 5 fig = plt.figure(1,(8., 8.)) grid = ImageGrid(fig, 111, nrows_ncols=(num_of_samples, num_of_samples), axes_pad=0.1) output = numpy.zeros(num_of_samples ** 2) for i, (data, target) in enumerate(test_loader): if i < 1: #dirty trick to take just one sample for j in range(num_of_samples ** 2): grid[j].matshow(Image.fromarray(data[j][0].numpy())) output[j] = target[j] else: break output = output.reshape(num_of_samples, num_of_sample) plt.show()
[[ 6. 9. 9. 5. 4.] [ 3. 6. 5. 0. 1.] [ 8. 1. 3. 6. 2.] [ 9. 4. 8. 8. 6.] [ 0. 6. 4. 2. 3.]]
You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.
class Model0(nn.Module): def __init__(self): super(Model0, self).__init__() self.output_layer = nn.Linear(28*28, 10) def forward(self, x): x = self.output_layer(x) return F.log_softmax(x) class Model1(nn.Module): def __init__(self): super(Model1, self).__init__() self.input_layer = nn.Linear(28*28, 5) self.output_layer = nn.Linear(5, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model2(nn.Module): def __init__(self): super(Model2, self).__init__() self.input_layer = nn.Linear(28*28, 6) self.output_layer = nn.Linear(6, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model3(nn.Module): def __init__(self): super(Model3, self).__init__() self.input_layer = nn.Linear(28*28, 7) self.output_layer = nn.Linear(7, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model4(nn.Module): def __init__(self): super(Model4, self).__init__() self.input_layer = nn.Linear(28*28, 8) self.output_layer = nn.Linear(8, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model5(nn.Module): def __init__(self): super(Model5, self).__init__() self.input_layer = nn.Linear(28*28, 9) self.output_layer = nn.Linear(9, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model6(nn.Module): def __init__(self): super(Model6, self).__init__() self.input_layer = nn.Linear(28*28, 10) self.output_layer = nn.Linear(10, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model7(nn.Module): def __init__(self): super(Model7, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model8(nn.Module): def __init__(self): super(Model8, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.hidden_layer = nn.Linear(100, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.hidden_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model9(nn.Module): def __init__(self): super(Model9, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.hidden_layer = nn.Linear(100, 100) self.hidden_layer1 = nn.Linear(100, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.hidden_layer(x) x = self.hidden_layer1(x) x = self.output_layer(x) return F.log_softmax(x) class Model10(nn.Module): def __init__(self): super(Model10, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.hidden_layer = nn.Linear(100, 100) self.hidden_layer1 = nn.Linear(100, 100) self.hidden_layer2 = nn.Linear(100, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.hidden_layer(x) x = self.hidden_layer1(x) x = self.hidden_layer2(x) x = self.output_layer(x return F.log_softmax(x)and lets train it
def train(epoch, model, print_every=10): optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) for i in range(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() data = data.view(args.batch_size , -1) data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() optimizer.step() if i % print_every == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( i, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0])) for model in models: train(1000, model) for i, model in enumerate(models): model.load_state_dict(torch.load('mnist_mlp_multiple_model{}.pth'.format(i)))lets see how our network predicts the images.
[[ 6. 2. 9. 1. 8.] [ 5. 6. 5. 7. 5.] [ 4. 8. 6. 3. 0.] [ 6. 1. 0. 9. 3.] [ 7. 2. 8. 4. 4.]]
Most of the predictions look right. Lets run this over entire test dataset.
def test(model): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if args.cuda: data, target = data.cuda(), target.cuda() data = data.view(data.size()[0], -1) data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += F.nll_loss(output, target).data[0] pred = output.data.max(1)[1] correct += pred.eq(target.data).cpu().sum() test_loss = test_loss test_loss /= len(test_loader) # print(' Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%) '.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset) ) ) return 100. * correct / len(test_loader.dataset) accuracy = [] for model in models: accuracy.append(test_tuts(model)) pprint.pprint(accuracy)
plt.plot(range(len(accuracy)), accuracy, linewidth=1.0) plt.axis([0, 10, 0, 100]) plt.show()
pl.plot(range(len(accuracy)), accuracy, linewidth=1.0) plt.axis([0, 10, 90, 93]) plt.show()