--Writing a convolutional neural network (CNN) with Chainer --Imitated AlexNet, the top CNN of ILSVRC2012 --I actually tried to classify images
Neural network proposed in the paper below. Top performance in ILSVRC 2012. After that, a network called VGG of Oxford University, which deepened AlexNet, appeared and performance improved. (As of 2016, there is also GoogLeNet that exceeds the performance even more. It seems that the accuracy is better than humans.)
↓ There is also a sample of Chainer.
This time, I wrote a degraded network of AlexNet and tried to actually classify images. The reason for the deterioration is simply to reduce the number of dimensions of the network in order to solve the memory shortage.
The network created this time is as follows.
class cnn(Chain):
def __init__(self):
super(cnn, self).__init__(
conv1=L.Convolution2D(3, 48, 3, stride=1),
bn1=L.BatchNormalization(48),
conv2=L.Convolution2D(48, 128, 3, pad=1),
bn2=L.BatchNormalization(128),
conv3=L.Convolution2D(128, 192, 3, pad=1),
conv4=L.Convolution2D(192, 192, 3, pad=1),
conv5=L.Convolution2D(192, 128, 3, pad=1),
fc6=L.Linear(2048, 1024),
fc7=L.Linear(1024, 1024),
fc8=L.Linear(1024, 10),
)
self.train = True
def clear(self):
self.loss = None
self.accuracy = None
def forward(self, x_data, t_data):
self.clear()
x, t = chainer.Variable(x_data, volatile=False), chainer.Variable(t_data, volatile=False)
h = self.bn1(self.conv1(x), test=not self.train)
h = F.max_pooling_2d(F.relu(h), 3, stride=2)
h = self.bn2(self.conv2(h), test=not self.train)
h = F.max_pooling_2d(F.relu(h), 3, stride=2)
h = F.relu(self.conv3(h))
h = F.relu(self.conv4(h))
h = F.relu(self.conv5(h))
h = F.max_pooling_2d(F.relu(h), 2, stride=2)
h = F.dropout(F.relu(self.fc6(h)), train=self.train)
h = F.dropout(F.relu(self.fc7(h)), train=self.train)
h = self.fc8(h)
self.loss = F.softmax_cross_entropy(h, t)
self.accuracy = F.accuracy(h, t)
if self.train:
return self.loss, self.accuracy
else:
return h.data, self.accuracy
As you can see by comparing it with the Chainer sample, the number of kernels and the filter size are reduced.
I used the CIFAR-10 dataset in ↓.
This is a dataset with 6000 images of 10 classes such as airplanes and animals per class. The size of one image is $ 32 \ times 32 $ and it has 3 RGB channels. Kindly, there is also a sample that reads data with python and stores it in an array. However, the structure of the data read by this method is that the first $ 1024 (= 32 \ times 32) $ elements are R, the next $ 1024 $ is G, and the next $ 1024 $ is B, so Chainer It needs to be converted into a form that can be read by the Convolution2D ()
function of.
I converted it as follows with reference to the following site.
x_train = x_train.reshape((len(x_train), 3, 32, 32))
Also, out of the total $ 60000 $ sheets in the $ 6000 $ sheets $ \ times 10 $ class, this time $ 10000 $ sheets were used as learning data and $ 10000 $ sheets were used as test data. If the number of training data is increased, the accuracy can be further improved.
The result when the learning loop is rotated 20 epoch. The accuracy was about 56%.
!/usr/bin/env python
-*- coding: utf-8 -*-
__version__ = '0.0.1'
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import re
import logging
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)
import pprint
def pp(obj):
pp = pprint.PrettyPrinter(indent=1, width=160)
str = pp.pformat(obj)
print re.sub(r"\\u([0-9a-f]{4})", lambda x: unichr(int("0x"+x.group(1),16)), str)
import os, math, time
import numpy as np
import cPickle as pickle
import copy
import chainer
from chainer import cuda, Function, gradient_check, Variable, optimizers, serializers, utils, Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
import subprocess
def unpickle(f):
import cPickle
fo = open(f, 'rb')
d = cPickle.load(fo)
fo.close()
return d
def load_cifar10(datadir, n_batch):
train_data = []
train_target = []
if n_batch > 5:
return -1
# load train data
for i in range(1, n_batch+1):
d = unpickle("%s/data_batch_%d" % (datadir, i))
train_data.extend(d["data"])
train_target.extend(d["labels"])
# load test data
d = unpickle("%s/test_batch" % (datadir))
test_data = d["data"]
test_target = d["labels"]
train_data = np.array(train_data, dtype=np.float32)
train_target = np.array(train_target, dtype=np.int32)
test_data = np.array(test_data, dtype=np.float32)
test_target = np.array(test_target, dtype=np.int32)
# normalization
train_data /= 255.0
test_data /= 255.0
return train_data, test_data, train_target, test_target
"""
CNN with Batch-Normalization
"""
class cnn(Chain):
def __init__(self):
super(cnn, self).__init__(
conv1=L.Convolution2D(3, 48, 3, stride=1),
bn1=L.BatchNormalization(48),
conv2=L.Convolution2D(48, 128, 3, pad=1),
bn2=L.BatchNormalization(128),
conv3=L.Convolution2D(128, 192, 3, pad=1),
conv4=L.Convolution2D(192, 192, 3, pad=1),
conv5=L.Convolution2D(192, 128, 3, pad=1),
fc6=L.Linear(2048, 1024),
fc7=L.Linear(1024, 1024),
fc8=L.Linear(1024, 10),
)
self.train = True
def clear(self):
self.loss = None
self.accuracy = None
def forward(self, x_data, t_data):
self.clear()
x, t = chainer.Variable(x_data, volatile=False), chainer.Variable(t_data, volatile=False)
h = self.bn1(self.conv1(x), test=not self.train)
h = F.max_pooling_2d(F.relu(h), 3, stride=2)
h = self.bn2(self.conv2(h), test=not self.train)
h = F.max_pooling_2d(F.relu(h), 3, stride=2)
h = F.relu(self.conv3(h))
h = F.relu(self.conv4(h))
h = F.relu(self.conv5(h))
h = F.max_pooling_2d(F.relu(h), 2, stride=2)
h = F.dropout(F.relu(self.fc6(h)), train=self.train)
h = F.dropout(F.relu(self.fc7(h)), train=self.train)
h = self.fc8(h)
self.loss = F.softmax_cross_entropy(h, t)
self.accuracy = F.accuracy(h, t)
if self.train:
return self.loss, self.accuracy
else:
return h.data, self.accuracy
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser(description='RAE')
parser.add_argument('--data_dir', type=unicode, default='', help='directory path to the cifar10 data')
parser.add_argument('--n_data', type=int, default=1, help='# of data')
parser.add_argument('--n_epoch', type=int, default=20, help='# of epochs')
parser.add_argument('--batchsize', type=int, default=1, help='size of mini-batch')
parser.add_argument('--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
"""
Get params
"""
# the number of training itaration
n_epoch = args.n_epoch
# the size of mini batch
batchsize = args.batchsize
# define cupy
xp = cuda.cupy if args.gpu >= 0 else np
"""
Load data
"""
print 'load CIFAR-10 dataset'
x_train, x_test, y_train, y_test = load_cifar10(args.data_dir, args.n_data)
N_train = y_train.size
N_test = y_test.size
print N_train, N_test
x_train = x_train.reshape((len(x_train), 3, 32, 32))
x_test = x_test.reshape((len(x_test), 3, 32, 32))
"""
Prepare Acnn model
"""
model = cnn()
"""
Setup optimizer
"""
optimizer = optimizers.SGD()
optimizer.setup(model)
"""
Setup GPU
"""
if args.gpu >= 0:
cuda.get_device(args.gpu).use()
model.to_gpu()
"""
Training Loop
"""
for epoch in range(1, n_epoch + 1):
print "epoch: %d" % epoch
perm = np.random.permutation(N_train)
sum_loss = 0
for i in range(0, N_train, batchsize):
x_batch = xp.asarray(x_train[perm[i:i + batchsize]])
y_batch = xp.asarray(y_train[perm[i:i + batchsize]])
optimizer.zero_grads()
loss, acc = model.forward(x_batch, y_batch)
loss.backward()
optimizer.update()
sum_loss += float(loss.data) * len(y_batch)
print "train mean loss: %f" % (sum_loss / N_train)
sum_accuracy = 0
for i in range(0, N_test, batchsize):
x_batch = xp.asarray(x_test[i:i + batchsize])
y_batch = xp.asarray(y_test[i:i + batchsize])
loss, acc = model.forward(x_batch, y_batch)
sum_accuracy += float(acc.data) * len(y_batch)
print "test accuracy: %f" % (sum_accuracy / N_test)
Study Deep Learning Thoroughly [DW Day 0]
Recommended Posts