Hello! I'm @eve_yk, a Liaro engineer. Please see Part 1 for the beginning of the matter (crying) Last time, we collected and processed data to train the classifier. This time, we will describe the model of the classifier and actually learn and evaluate (face recognition)!
Next, describe the model of the face image classifier to be trained in Chainer. I referred to the following code (including the previous data set creation). https://github.com/mitmul/chainer-cifar10
YTNet
class YTNet(chainer.Chain):
def __init__(self):
"""
Model definition
"""
super(YTNet, self).__init__(
conv1=L.Convolution2D(3, 32, 5, stride=1, pad=2),
bn1 =L.BatchNormalization(32),
conv2=L.Convolution2D(32, 32, 5, stride=1, pad=2),
bn2 =L.BatchNormalization(32),
conv3=L.Convolution2D(32, 64, 5, stride=1, pad=2),
fc4=F.Linear(16384, 4096),
fc5=F.Linear(4096, 2),
)
self.train = True
def __call__(self, x, t):
"""
forward processing
"""
h = F.max_pooling_2d(F.relu(self.conv1(x)), 3, stride=2)
h = F.max_pooling_2d(F.relu(self.conv2(h)), 3, stride=2)
h = F.relu(self.conv3(h))
h = F.dropout(F.relu(self.fc4(h)), ratio=0.5, train=self.train)
h = self.fc5(h)
self.loss = F.softmax_cross_entropy(h, t)
self.accuracy = F.accuracy(h, t)
if self.train:
return self.loss
else:
self.pred = F.softmax(h)
return self.pred
Based on the code in 3., write the code to train the model. This time, the learning will be completed relatively quickly, but when trying to work with a larger dataset, it is good for mental health to show the progress of the learning.
train.py
# -*- coding: utf-8 -*-
import argparse
import os
import six
import chainer
import chainer.functions as F
import chainer.links as L
import numpy as np
from chainer import optimizers
from chainer import cuda
from chainer import serializers
from chainer import Variable
from progressbar import ProgressBar
class YTNet(chainer.Chain):
def __init__(self):
"""
Model definition
"""
super(YTNet, self).__init__(
conv1=L.Convolution2D(3, 32, 5, stride=1, pad=2),
bn1 =L.BatchNormalization(32),
conv2=L.Convolution2D(32, 32, 5, stride=1, pad=2),
bn2 =L.BatchNormalization(32),
conv3=L.Convolution2D(32, 64, 5, stride=1, pad=2),
fc4=F.Linear(16384, 4096),
fc5=F.Linear(4096, 2),
)
self.train = True
def __call__(self, x, t):
"""
forward processing
"""
h = F.max_pooling_2d(F.relu(self.conv1(x)), 3, stride=2)
h = F.max_pooling_2d(F.relu(self.conv2(h)), 3, stride=2)
h = F.relu(self.conv3(h))
h = F.dropout(F.relu(self.fc4(h)), ratio=0.5, train=self.train)
h = self.fc5(h)
self.loss = F.softmax_cross_entropy(h, t)
self.accuracy = F.accuracy(h, t)
if self.train:
return self.loss
else:
self.pred = F.softmax(h)
return self.pred
def one_epoch(args, model, optimizer, data, label, epoch, train):
"""
1epoch training or evaluation processing
"""
model.train = train
xp = cuda.cupy if args.gpu >= 0 else np
sum_accuracy = 0
sum_loss = 0
p = ProgressBar(min_value=0, max_value=data.shape[0]) #For checking progress
perm = np.random.permutation(data.shape[0])
for i in xrange(0, data.shape[0], args.batchsize):
p.update(i)
#Create a mini batch
target = perm[i:i + args.batchsize]
x = xp.array(data[target], dtype=xp.float32)
t = xp.array(label[target], dtype=xp.int32)
#Create Variable
volatile = 'off' if train else 'on'
x = Variable(x, volatile=volatile)
t = Variable(t, volatile=volatile)
#Parameter update or label prediction
if train:
optimizer.update(model, x, t)
else:
pred = model(x, t).data
sum_loss += float(model.loss.data) * t.data.shape[0]
sum_accuracy += float(model.accuracy.data) * t.data.shape[0]
del x, t
print "" #For line breaks
if train:
print "train epoch " + str(epoch)
print " train loss : " + str(sum_loss / data.shape[0])
print " train acc : " + str(sum_accuracy / data.shape[0])
else:
print "test epoch " + str(epoch)
print " test loss : " + str(sum_loss / data.shape[0])
print " test acc : " + str(sum_accuracy / data.shape[0])
def load_dataset(datadir):
"""
Load the dataset
"""
train_data = np.load('%s/train_data.npy' % datadir)
train_labels = np.load('%s/train_label.npy' % datadir)
test_data = np.load('%s/test_data.npy' % datadir)
test_labels = np.load('%s/test_label.npy' % datadir)
return train_data, train_labels, test_data, test_labels
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--gpu", type=int, default=-1)
parser.add_argument("--batchsize", type=int, default=10)
parser.add_argument('--data_dir', type=str, default='dataset')
parser.add_argument('--output_dir', type=str, default='result')
args = parser.parse_args()
# model,Create optimizer
model = YTNet()
optimizer = optimizers.Adam(alpha=0.00005)
optimizer.setup(model)
#Data set load
dataset = load_dataset(args.data_dir)
tr_data, tr_labels, te_data, te_labels = dataset
#Main loop
for epoch in range(1, 20):
#Training
one_epoch(args, model, optimizer, tr_data, tr_labels, epoch, True)
#Evaluation
one_epoch(args, model, optimizer, te_data, te_labels, epoch, False)
#Save model
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
serializers.save_npz(args.output_dir + "YTNet.chainermodel", model)
serializers.save_npz(args.output_dir + "YTNet.state", optimizer)
After learning about 20 epoch, the accuracy of the training data exceeded 99%.
Finally, we will evaluate the performance. Test with the following 10 images. Five of them are @eve_yk and five are Super Maradona Tanaka. A sample face can be found in Previous article, so please try to guess if you can guess it.
I'm a little worried, but you can tell by looking at it. It's different from the color of the glasses. The correct answer is eve_yk for 1,2,4,7,10 and Mr. Tanaka for 3,5,6,8,9.
Now, what about the CNN you learned this time? Test with the following code.
test.py
# coding:utf-8
import os
import sys
import argparse
import glob
import cv2
import numpy as np
from chainer import Variable
from chainer import serializers
from train import YTNet
def transpose_opencv2chainer(x):
"""
Convert from opencv npy format to chainer npy format
opencv => (height, width, channel)
chainer => (channel, height, width)
"""
return x.transpose(2,0,1)
file2labels = {"01.jpg ":"eve_yk", "02.jpg ":"eve_yk", "03.jpg ":"tanaka",
"04.jpg ":"eve_yk", "05.jpg ":"tanaka", "06.jpg ":"tanaka",
"07.jpg ":"eve_yk", "08.jpg ":"tanaka", "09.jpg ":"tanaka",
"10.jpg ":"eve_yk"}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Create dataset for CNN')
parser.add_argument('--input_path', required=True, type=str)
parser.add_argument('--model_path', required=True, type=str)
args = parser.parse_args()
#Get jpg file list
test_files = glob.glob(args.input_path+"/*.jpg ")
#Model loading
model = YTNet()
model = serializers.load_npz(args.model_path, , model)
#Evaluate one by one
collect_count = 0.0
test_count = 0.0
for file_path in test_files:
image = cv2.imread(file_path)
if image is None:
#Read failure
continue
test_count += 1.0
#Get the file name from the directory structure
file_name = file_path.split("/")[-1]
print file_name+"("+file2labels[file_name]+") :",
#Convert to chainer format
image = transpose_opencv2chainer(image)
x = Variable(np.asarray([image], dtype=np.float32), volatile="on")
t = Variable(np.asarray([[0]], dtype=np.int32), volatile="on")
#Evaluation
pred = model(x, t).data
if int(pred) == 0: # tanaka
print u"Identification result "tanaka""
if file2labels[file_name] == u"tanaka":
collect_count += 1.0
else: # eve_yk
print u"Identification result "eve_yk」"
if file2labels[file_name] == u"eve_yk":
collect_count += 1.0
print u"total:{}%".format(collect_count/test_count*100)
The result is this street
python test.py --input_path test/ --model_path result/YTNet.chainermodel
08.jpg(tanaka) :Identification result "tanaka"
09.jpg(tanaka) :Identification result "eve_yk」
07.jpg(eve_yk) :Identification result "eve_yk」
01.jpg(eve_yk) :Identification result "eve_yk」
03.jpg(tanaka) :Identification result "tanaka"
06.jpg(tanaka) :Identification result "eve_yk」
02.jpg(eve_yk) :Identification result "eve_yk」
05.jpg(tanaka) :Identification result "tanaka"
04.jpg(eve_yk) :Identification result "eve_yk」
10.jpg(eve_yk) :Identification result "eve_yk」
total:8.0/10
I mistaken Mr. Tanaka of 6 and 9 for eve_yk. Is it a biased prediction due to the difference in the amount of training data? However, I feel that the image that is bothersome to the human eye is wrong. After all, it seems difficult to distinguish doppelgangers with a half-finished system.
I have created a face classifier that identifies my look-alike. The accuracy is ok. I think it was good that there were many compromises such as the number of data and preprocessing. It would be interesting to work hard to collect data and classify more people, or work hard on preprocessing to improve accuracy!
Liaro and eve_yk are rooting for Super Maradona!
https://github.com/mitmul/chainer-cifar10
Yoshimoto Kogyo Co., Ltd. Entertainer Profile | Super Maradona
Recommended Posts