MNIST is the hello world into ML world. MNIST dataset is a collection of images of numbers 0..9 and labels. The images are of size 28x28. Lets just take few sample from the dataset to get a feel of how it looks like.
You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
class Args:
pass
args = Args()
args.batch_size = 32
args.cuda = True
args.lr = 0.001
args.momentum = 0.01
args.epochs = 10
args.log_interval = 10
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from PIL import Image
import pprint
import numpy
num_of_samples = 5
fig = plt.figure(1,(8., 8.))
grid = ImageGrid(fig, 111,
nrows_ncols=(num_of_samples, num_of_samples),
axes_pad=0.1)
output = numpy.zeros(num_of_samples ** 2)
for i, (data, target) in enumerate(test_loader):
if i < 1: #dirty trick to take just one sample
for j in range(num_of_samples ** 2):
grid[j].matshow(Image.fromarray(data[j][0].numpy()))
output[j] = target[j]
else:
break
output = output.reshape(num_of_samples, num_of_sample)
plt.show()
[[ 6. 9. 9. 5. 4.] [ 3. 6. 5. 0. 1.] [ 8. 1. 3. 6. 2.] [ 9. 4. 8. 8. 6.] [ 0. 6. 4. 2. 3.]]
You can see that the image of number <> is associated with number <>. It is a list of (image of number, number). As usual we are gonna feed the neural network with image from the left and its label from the right. We will train a simple feed forward network, call it Model0.
class Model0(nn.Module): def __init__(self): super(Model0, self).__init__() self.output_layer = nn.Linear(28*28, 10) def forward(self, x): x = self.output_layer(x) return F.log_softmax(x) class Model1(nn.Module): def __init__(self): super(Model1, self).__init__() self.input_layer = nn.Linear(28*28, 5) self.output_layer = nn.Linear(5, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model2(nn.Module): def __init__(self): super(Model2, self).__init__() self.input_layer = nn.Linear(28*28, 6) self.output_layer = nn.Linear(6, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model3(nn.Module): def __init__(self): super(Model3, self).__init__() self.input_layer = nn.Linear(28*28, 7) self.output_layer = nn.Linear(7, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model4(nn.Module): def __init__(self): super(Model4, self).__init__() self.input_layer = nn.Linear(28*28, 8) self.output_layer = nn.Linear(8, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model5(nn.Module): def __init__(self): super(Model5, self).__init__() self.input_layer = nn.Linear(28*28, 9) self.output_layer = nn.Linear(9, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model6(nn.Module): def __init__(self): super(Model6, self).__init__() self.input_layer = nn.Linear(28*28, 10) self.output_layer = nn.Linear(10, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model7(nn.Module): def __init__(self): super(Model7, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model8(nn.Module): def __init__(self): super(Model8, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.hidden_layer = nn.Linear(100, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.hidden_layer(x) x = self.output_layer(x) return F.log_softmax(x) class Model9(nn.Module): def __init__(self): super(Model9, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.hidden_layer = nn.Linear(100, 100) self.hidden_layer1 = nn.Linear(100, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.hidden_layer(x) x = self.hidden_layer1(x) x = self.output_layer(x) return F.log_softmax(x) class Model10(nn.Module): def __init__(self): super(Model10, self).__init__() self.input_layer = nn.Linear(28*28, 100) self.hidden_layer = nn.Linear(100, 100) self.hidden_layer1 = nn.Linear(100, 100) self.hidden_layer2 = nn.Linear(100, 100) self.output_layer = nn.Linear(100, 10) def forward(self, x): x = self.input_layer(x) x = self.hidden_layer(x) x = self.hidden_layer1(x) x = self.hidden_layer2(x) x = self.output_layer(x return F.log_softmax(x)and lets train it
def train(epoch, model, print_every=10):
optimizer = optim.SGD(model.parameters(),
lr=args.lr, momentum=args.momentum)
for i in range(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data = data.view(args.batch_size , -1)
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if i % print_every == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
i, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
for model in models:
train(1000, model)
for i, model in enumerate(models):
model.load_state_dict(torch.load('mnist_mlp_multiple_model{}.pth'.format(i)))
lets see how our network predicts the images.
[[ 6. 2. 9. 1. 8.] [ 5. 6. 5. 7. 5.] [ 4. 8. 6. 3. 0.] [ 6. 1. 0. 9. 3.] [ 7. 2. 8. 4. 4.]]
Most of the predictions look right. Lets run this over entire test dataset.
def test(model):
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data = data.view(data.size()[0], -1)
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target).data[0]
pred = output.data.max(1)[1]
correct += pred.eq(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(test_loader) #
print(' Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)
'.format(
test_loss,
correct,
len(test_loader.dataset),
100. * correct / len(test_loader.dataset)
)
)
return 100. * correct / len(test_loader.dataset)
accuracy = []
for model in models:
accuracy.append(test_tuts(model))
pprint.pprint(accuracy)
plt.plot(range(len(accuracy)), accuracy, linewidth=1.0) plt.axis([0, 10, 0, 100]) plt.show()
pl.plot(range(len(accuracy)), accuracy, linewidth=1.0) plt.axis([0, 10, 90, 93]) plt.show()


