** Click here for the sequel by Keras (http://qiita.com/hogefugabar/items/afb4f6c9a93a4bbda51a). ** **
This is my first post. Although hot days continue, I would like to classify anime face image data using deep learning. I'm not very familiar with anime. You can tell if it's Hatsune Miku. Who on earth is who?
The dataset can be obtained from animeface-character-dataset. Reference: I tried to extract the features of the anime face with Denoising AutoEncoder
This time, we will implement a convolutional neural network using Chainer. First, define the model. It's like Convolution → Max Pooling → Convolution → Max Pooling → Full-Connected → Softmax. Please forgive that the code is dirty.
Reference: https://github.com/mitmul/chainer-cifar10/blob/master/models/Cifar10.py
CNN.py
import time
import six.moves.cPickle as pickle
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
from chainer import cuda, Variable, FunctionSet, optimizers
import chainer.functions as F
class ImageNet(FunctionSet):
def __init__(self, n_outputs):
super(ImageNet, self).__init__(
conv1= F.Convolution2D(3, 32, 5),
conv2= F.Convolution2D(32, 32, 5),
l3= F.Linear(512, 512),
l4= F.Linear(512, n_outputs)
)
def forward(self, x_data, y_data, train=True, gpu=-1):
if gpu >= 0:
x_data = cuda.to_gpu(x_data)
y_data = cuda.to_gpu(y_data)
x, t = Variable(x_data), Variable(y_data)
h = F.max_pooling_2d(F.relu(self.conv1(x)), ksize=2, stride=2)
h = F.max_pooling_2d(F.relu(self.conv2(h)), ksize=3, stride=3)
h = F.dropout(F.relu(self.l3(h)), train=train)
y = self.l4(h)
return F.softmax_cross_entropy(y, t), F.accuracy(y,t)
For the time being, we will make it possible to learn and evaluate using the defined model.
CNN.py
class CNN:
def __init__(self, data, target, n_outputs, gpu=-1):
self.model = ImageNet(n_outputs)
self.model_name = 'cnn_model'
if gpu >= 0:
self.model.to_gpu()
self.gpu = gpu
self.x_train,\
self.x_test,\
self.y_train,\
self.y_test = train_test_split(data, target, test_size=0.1)
self.n_train = len(self.y_train)
self.n_test = len(self.y_test)
self.optimizer = optimizers.Adam()
self.optimizer.setup(self.model.collect_parameters())
def predict(self, x_data, gpu=-1):
return self.model.predict(x_data, gpu)
def train_and_test(self, n_epoch=100, batchsize=100):
epoch = 1
best_accuracy = 0
while epoch <= n_epoch:
print 'epoch', epoch
perm = np.random.permutation(self.n_train)
sum_train_accuracy = 0
sum_train_loss = 0
for i in xrange(0, self.n_train, batchsize):
x_batch = self.x_train[perm[i:i+batchsize]]
y_batch = self.y_train[perm[i:i+batchsize]]
real_batchsize = len(x_batch)
self.optimizer.zero_grads()
loss, acc = self.model.forward(x_batch, y_batch, train=True, gpu=self.gpu)
loss.backward()
self.optimizer.update()
sum_train_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
sum_train_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize
print 'train mean loss={}, accuracy={}'.format(sum_train_loss/self.n_train, sum_train_accuracy/self.n_train)
# evaluation
sum_test_accuracy = 0
sum_test_loss = 0
for i in xrange(0, self.n_test, batchsize):
x_batch = self.x_test[i:i+batchsize]
y_batch = self.y_test[i:i+batchsize]
real_batchsize = len(x_batch)
loss, acc = self.model.forward(x_batch, y_batch, train=False, gpu=self.gpu)
sum_test_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
sum_test_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize
print 'test mean loss={}, accuracy={}'.format(sum_test_loss/self.n_test, sum_test_accuracy/self.n_test)
epoch += 1
def dump_model(self):
self.model.to_cpu()
pickle.dump(self.model, open(self.model_name, 'wb'), -1)
def load_model(self):
self.model = pickle.load(open(self.model_name,'rb'))
if self.gpu >= 0:
self.model.to_gpu()
self.optimizer.setup(self.model.collect_parameters())
Set all image sizes to 32 * 32 to reduce processing. Also, ** Delete the directory that does not contain images ** in advance.
animeface.py
#! -*- coding: utf-8 -*-
import os
import six.moves.cPickle as pickle
import numpy as np
import cv2 as cv
class AnimeFaceDataset:
def __init__(self):
self.data_dir_path = u"./animeface-character-dataset/thumb/"
self.data = None
self.target = None
self.n_types_target = -1
self.dump_name = u'dataset'
self.image_size = 32
def get_dir_list(self):
tmp = os.listdir(self.data_dir_path)
if tmp is None:
return None
return sorted([x for x in tmp if os.path.isdir(self.data_dir_path+x)])
def get_class_id(self, fname):
dir_list = self.get_dir_list()
dir_name = filter(lambda x: x in fname, dir_list)
return dir_list.index(dir_name[0])
def load_data_target(self):
if os.path.exists(self.dump_name):
self.load_dataset()
if self.target is None:
dir_list = self.get_dir_list()
ret = {}
self.target = []
target_name = []
self.data = []
for dir_name in dir_list:
file_list = os.listdir(self.data_dir_path+dir_name)
for file_name in file_list:
root, ext = os.path.splitext(file_name)
if ext == u'.png':
abs_name = self.data_dir_path+dir_name+'/'+file_name
# read class id i.e., target
class_id = self.get_class_id(abs_name)
self.target.append(class_id)
target_name.append(str(dir_name))
# read image i.e., data
image = cv.imread(abs_name)
image = cv.resize(image, (self.image_size, self.image_size))
image = image.transpose(2,0,1)
image = image/255.
self.data.append(image)
self.index2name = {}
for i in xrange(len(self.target)):
self.index2name[self.target[i]] = target_name[i]
self.data = np.array(self.data, np.float32)
self.target = np.array(self.target, np.int32)
self.dump_dataset()
def get_n_types_target(self):
if self.target is None:
self.load_data_target()
if self.n_types_target is not -1:
return self.n_types_target
tmp = {}
for target in self.target:
tmp[target] = 0
return len(tmp)
def dump_dataset(self):
pickle.dump((self.data,self.target,self.index2name), open(self.dump_name, 'wb'), -1)
def load_dataset(self):
self.data, self.target, self.index2name = pickle.load(open(self.dump_name, 'rb'))
Let's actually read the data.
In [1]: from animeface import AnimeFaceDataset
In [2]: dataset = AnimeFaceDataset()
In [3]: dataset.load_data_target()
In [5]: dataset.get_n_types_target()
Out[5]: 176
In [6]: len(dataset.target)
Out[6]: 14490
Therefore, it is a classification problem with 14490 data and 176 classes (characters). I wonder if there are 176 people. Let's actually learn with the following code.
from CNN import CNN
from animeface import AnimeFaceDataset
from chainer import cuda
#I'll use GPU
cuda.init(0)
print 'load AnimeFace dataset'
dataset = AnimeFaceDataset()
dataset.read_data_target()
data = dataset.data
target = dataset.target
n_outputs = dataset.get_n_types_target()
cnn = CNN(data=data,
target=target,
gpu=0,
n_outputs=n_outputs)
cnn.train_and_test(n_epoch=100)
The execution result is as follows.
C:\Python27\lib\site-packages\skcuda\cublas.py:273: UserWarning: creating CUBLAS
context to get version number
warnings.warn('creating CUBLAS context to get version number')
load AnimeFace dataset
epoch 1
train mean loss=4.77383880182, accuracy=0.0361935423276
test mean loss=3.88453409868, accuracy=0.116632157313
epoch 2
train mean loss=3.52874370272, accuracy=0.158193386024
test mean loss=3.00467933286, accuracy=0.247066933423
epoch 3
train mean loss=2.95961939461, accuracy=0.254735058687
test mean loss=2.6362867278, accuracy=0.327122144303
epoch 4
train mean loss=2.634737659, accuracy=0.319607384265
test mean loss=2.38959699009, accuracy=0.395445127233
----
Omission
----
epoch 96
train mean loss=0.227027994983, accuracy=0.925159092696
test mean loss=2.70711887911, accuracy=0.589371965415
epoch 97
train mean loss=0.216873285405, accuracy=0.927382851637
test mean loss=2.6218228118, accuracy=0.594893018034
epoch 98
train mean loss=0.209225204521, accuracy=0.930220058136
test mean loss=2.68379376295, accuracy=0.5935127585
epoch 99
train mean loss=0.209071503231, accuracy=0.928072985573
test mean loss=2.62009712151, accuracy=0.593512752658
epoch 100
train mean loss=0.210750763214, accuracy=0.92999001446
test mean loss=2.75891605618, accuracy=0.589371977427
Early Stopping was not executed.
The result was a correct answer rate of about 60%. Perhaps I think I can go if I change the architecture of CNN, and I think it was too small to make the image 32 * 32. .. .. When I'm going to give the code, variable names, function names, etc. to GitHub next time. I would appreciate it if you could point out any strange points.
Recommended Posts