--Let's easily implement learning to colorize monochrome images with Chainer. ――Recent Chainer can be written abstractly, so implement it using Trainer etc. --Let's compare the difference in finish due to the difference in fully connected NN, convolution NN, and activation function.
Use CIFAR-100 for the dataset. Chainer has several easy-to-use datasets, such as MNIST and CIFAR-100. This time, I chose CIFAR-100, which can quickly prepare many color images, but I think that a dataset containing more types of images is originally appropriate.
Use of CIFAR-100 with Chainer is implemented as follows.
train, test = chainer.datasets.get_cifar100(withlabel=False)
Chainer's supervised learning dataset is implemented by passing an array of (training data, teacher data)
to an iterator. To learn to colorize a monochrome image, you need an array of (monochrome image, color image)
. From the chainer.datasets.get_cifar100 ()
used this time, you can get a tuple array (like) (image, label)
, but since you do not need a label, set withlabel to False and only the image array To get
Next, modify the obtained color image dataset to (monochrome image, color image)
. When you create your own dataset with Chainer, you will often implement it with chainer.datasets.TupleDataset (training data array, teacher data array)
. However, since it is easy to generate a monochrome image from a color image, this time we will use a class that inherits chainer.dataset.DatasetMixin
and implement it by preparing a monochrome image just before creating a mini-batch. This sample will be helpful. It seems that it is easy to perform processing such as cropping the image and adding noise in this way.
class PreprocessedDataset(chainer.dataset.DatasetMixin):
def __init__(self, base_image_dataset):
self.base = base_image_dataset
def __len__(self):
return len(self.base)
def get_example(self, i):
color_image = self.base[i]
gray_image = np.ndarray((32, 32), dtype=np.float32)
for ch in range(3):
gray_image = (
0.298912*color_image[0]
+ 0.586611*color_image[1]
+ 0.114478*color_image[2]
)
return gray_image, color_image
This time I will try two types of NN. One is simply connecting the fully connected layers, and the other is connecting the Deconvolution layer behind the Convolution layer.
class AIC_FC(chainer.Chain):
def __init__(self, n_units):
initializer = chainer.initializers.HeNormal()
super(AIC_FC, self).__init__(
fc_in = L.Linear(None, n_units),
bn1 = L.BatchNormalization(n_units),
fc2 = L.Linear(None, n_units),
bn2 = L.BatchNormalization(n_units),
fc_out = L.Linear(None, 32*32*3)
)
def __call__(self, x, t):
y = self.colorize(x)
loss = F.mean_squared_error(y, t)
chainer.reporter.report({
'loss': loss
})
return loss
def colorize(self, x, test=False):
h = F.elu(self.bn1(self.fc_in(x), test=test))
h = F.elu(self.bn2(self.fc2(h), test=test))
y = F.reshape(self.fc_out(h), (h.shape[0], 3, 32, 32))
return y
class AIC_DC(chainer.Chain):
def __init__(self, n_ch):
initializer = chainer.initializers.HeNormal()
super(AIC_DC, self).__init__(
cv_in = L.Convolution2D(1, n_ch//4, 4, 2, 1),
bn1 = L.BatchNormalization(n_ch//4),
cv1 = L.Convolution2D(n_ch//4, n_ch//2, 4, 2, 1),
bn2 = L.BatchNormalization(n_ch//2),
cv2 = L.Convolution2D(n_ch//2, n_ch, 4, 2, 1),
bn3 = L.BatchNormalization(n_ch),
cv3 = L.Convolution2D(n_ch, n_ch, 4, 2, 1),
bn4 = L.BatchNormalization(n_ch),
dc1 = L.Deconvolution2D(n_ch, n_ch, 4, 2, 1),
bn5 = L.BatchNormalization(n_ch),
dc2 = L.Deconvolution2D(n_ch, n_ch//2, 4, 2, 1),
bn6 = L.BatchNormalization(n_ch//2),
dc3 = L.Deconvolution2D(n_ch//2, n_ch//4, 4, 2, 1),
bn7 = L.BatchNormalization(n_ch//4),
dc_out = L.Deconvolution2D(n_ch//4, 3, 4, 2, 1, outsize=(32, 32))
)
def __call__(self, x, t):
y = self.colorize(x)
loss = F.mean_squared_error(y, t)
chainer.reporter.report({
'loss': loss
})
return loss
def colorize(self, x, test=False):
h = F.reshape(x, (x.shape[0], 1, 32, 32))
h = F.elu(self.bn1(self.cv_in(h), test=test))
h = F.elu(self.bn2(self.cv1(h), test=test))
h = F.elu(self.bn3(self.cv2(h), test=test))
h = F.elu(self.bn4(self.cv3(h), test=test))
h = F.elu(self.bn5(self.dc1(h), test=test))
h = F.elu(self.bn6(self.dc2(h), test=test))
h = F.elu(self.bn7(self.dc3(h), test=test))
y = self.dc_out(h)
return y
When I tried to deepen the fully connected NN, I made it shallow because only a blurry image was generated (convergence is slow?).
By the way, the actual colorized NN seems to have a more complicated design. (Reference)
The flow of learning implementation in Chainer
It will be. In addition to Adam, Optimizer also has basic ones such as SGD and Momentum SGD, and it seems that Standard Updater is sufficient for problems that Updater does not perform complicated loss calculation like GAN. Compared to the time when I wrote everything by myself, the default ones are nice, so it's a lot easier. Since only the necessary parts need to be modified, it is easier to understand when viewed by other people, and I am grateful.
Problems like this one are difficult to understand from the loss value alone, so I want to visualize them. Therefore, implement the model test with Trainer Extension. Extensions are created by inheriting chainer.training.extention.Extension
etc. or by usingchainer.training.make_extension ()
. Implement model testing and image saving with chainer.training.make_extension ()
as follows. (I'm importing imsave from scipy.misc.)
A test image is prepared separately from the training data.
@chainer.training.make_extension(trigger=(1, 'epoch'))
def test_model(trainer):
colorized_img = chainer.cuda.to_cpu(F.clipped_relu(model.colorize(test_img, test=True), z=1.0).data)
imsave(
'test_colorized{}.png'.format(trainer.updater.epoch),
colorized_img
.transpose(0, 2, 3, 1)
.reshape((8, 8, 32, 32, 3))
.transpose(1, 2, 0, 3, 4)
.reshape(8*32, 8*32, 3)
)
trainer.extend(test_model)
I will introduce the results of learning. This time, I tried two NNs, a fully connected NN and a convolutional NN, so I will compare them with the state of learning at the time of 30 epoch learning. We learned that n_units of fully connected NN is 2048 and n_ch of convoluted NN is 512.
Fully connected NN
Convolutional NN
Fully combined NN seems to tend to be a grainy image overall. I was surprised that even a simple NN had a lot of colors, but I feel that the convolutional NN can output more vivid and beautiful images.
Looking at each image, it seems that the sky and the sea tend to be recognized well and the colors are placed neatly. However, it seems difficult to distinguish between the blue sky and the sunset. In the lower right image, which seems to show a small animal, the ground is recognized and the color of brown and grass is reproduced. However, it is a little too grassy compared to the correct answer.
The image above is from using elu as the activation function. I also investigated how the finish changes when relu or leaky_relu is used as the activation function. Only the results of convolutional NN, 30 epoch with n_ch of 512 are introduced.
relu
leaky_relu
elu
The relu is vividly finished, but it looks a little blurry. elu and leaky relu seem to be superior to relu in image clarity. The leaky relu gives an impression between elu and relu.
#! /usr/bin/env python
# coding : utf-8
import argparse
import numpy as np
from scipy.misc import imsave
import chainer
import chainer.functions as F
import chainer.links as L
from chainer.training import extensions
class PreprocessedDataset(chainer.dataset.DatasetMixin):
def __init__(self, base_image_dataset):
self.base = base_image_dataset
def __len__(self):
return len(self.base)
def get_example(self, i):
color_image = self.base[i]
gray_image = np.ndarray((32, 32), dtype=np.float32)
for ch in range(3):
#Calculate brightness and create monochrome image
gray_image = (
0.298912*color_image[0]
+ 0.586611*color_image[1]
+ 0.114478*color_image[2]
)
return gray_image, color_image
class AIC_FC(chainer.Chain):
def __init__(self, n_units):
initializer = chainer.initializers.HeNormal()
super(AIC_FC, self).__init__(
fc_in = L.Linear(None, n_units),
bn1 = L.BatchNormalization(n_units),
fc2 = L.Linear(None, n_units),
bn2 = L.BatchNormalization(n_units),
fc_out = L.Linear(None, 32*32*3)
)
def __call__(self, x, t):
y = self.colorize(x)
loss = F.mean_squared_error(y, t)
chainer.reporter.report({
'loss': loss
})
return loss
def colorize(self, x, test=False):
h = F.elu(self.bn1(self.fc_in(x), test=test))
h = F.elu(self.bn2(self.fc2(h), test=test))
y = F.reshape(self.fc_out(h), (h.shape[0], 3, 32, 32))
return y
class AIC_DC(chainer.Chain):
def __init__(self, n_ch):
initializer = chainer.initializers.HeNormal()
super(AIC_DC, self).__init__(
cv_in = L.Convolution2D(1, n_ch//4, 4, 2, 1),
bn1 = L.BatchNormalization(n_ch//4),
cv1 = L.Convolution2D(n_ch//4, n_ch//2, 4, 2, 1),
bn2 = L.BatchNormalization(n_ch//2),
cv2 = L.Convolution2D(n_ch//2, n_ch, 4, 2, 1),
bn3 = L.BatchNormalization(n_ch),
cv3 = L.Convolution2D(n_ch, n_ch, 4, 2, 1),
bn4 = L.BatchNormalization(n_ch),
dc1 = L.Deconvolution2D(n_ch, n_ch, 4, 2, 1),
bn5 = L.BatchNormalization(n_ch),
dc2 = L.Deconvolution2D(n_ch, n_ch//2, 4, 2, 1),
bn6 = L.BatchNormalization(n_ch//2),
dc3 = L.Deconvolution2D(n_ch//2, n_ch//4, 4, 2, 1),
bn7 = L.BatchNormalization(n_ch//4),
dc_out = L.Deconvolution2D(n_ch//4, 3, 4, 2, 1, outsize=(32, 32))
)
def __call__(self, x, t):
y = self.colorize(x)
loss = F.mean_squared_error(y, t)
chainer.reporter.report({
'loss': loss
})
return loss
def colorize(self, x, test=False):
#Reshape so that ndim is 4 for input to the Convolution layer
h = F.reshape(x, (x.shape[0], 1, 32, 32))
h = F.elu(self.bn1(self.cv_in(h), test=test))
h = F.elu(self.bn2(self.cv1(h), test=test))
h = F.elu(self.bn3(self.cv2(h), test=test))
h = F.elu(self.bn4(self.cv3(h), test=test))
h = F.elu(self.bn5(self.dc1(h), test=test))
h = F.elu(self.bn6(self.dc2(h), test=test))
h = F.elu(self.bn7(self.dc3(h), test=test))
y = self.dc_out(h)
return y
def main():
parser = argparse.ArgumentParser(description='Automatic Image Colorization')
parser.add_argument('--batchsize', '-b', type=int, default=64,
help='Number of images in each mini-batch')
parser.add_argument('--epoch', '-e', type=int, default=30,
help='Number of sweeps over the dataset to train')
parser.add_argument('--gpu', '-g', type=int, default=0,
help='GPU ID (negative value indicates CPU)')
parser.add_argument('--resume', '-r', default='',
help='Resume the training from snapshot')
parser.add_argument('--n_ch', '-nc', type=int, default=1024,
help='Number of channels')
parser.add_argument('--n_units', '-nu', type=int, default=0,
help='Number of units')
args = parser.parse_args()
print('# GPU: {}'.format(args.gpu))
print('# Minibatch-size: {}'.format(args.batchsize))
print('# epoch: {}'.format(args.epoch))
if args.n_units > 0:
print('# n_units: {}\n'.format(args.n_units))
model = AIC_FC(args.n_units)
else:
print('# n_ch: {}\n'.format(args.n_ch))
model = AIC_DC(args.n_ch)
if args.gpu >= 0:
chainer.cuda.get_device().use()
model.to_gpu()
opt = chainer.optimizers.Adam()
opt.setup(model)
train, test = chainer.datasets.get_cifar100(withlabel=False)
test_img = (
0.298912*test[:64,0]
+ 0.586611*test[:64,1]
+ 0.114478*test[:64,2]
)
#Save 64 images as a single 8x8 image
imsave(
'test.png',
test[:64]
.transpose(0, 2, 3, 1)
.reshape((8, 8, 32, 32, 3))
.transpose(1, 2, 0, 3, 4)
.reshape(8*32, 8*32, 3)
)
imsave(
'test_gray.png',
test_img
.reshape((8, 8, 32, 32))
.transpose(1, 2, 0, 3)
.reshape(8*32, 8*32)
)
if args.gpu >= 0:
test_img = chainer.cuda.to_gpu(test_img)
dataset = PreprocessedDataset(train)
iterator = chainer.iterators.MultiprocessIterator(dataset, args.batchsize)
updater = chainer.training.StandardUpdater(iterator, opt, device=args.gpu)
trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport([
'epoch', 'loss', 'elapsed_time'
]))
@chainer.training.make_extension(trigger=(10, 'epoch'))
def test_model(trainer):
#Range 0~Clipped to be 1_Through relu
colorized_img = chainer.cuda.to_cpu(F.clipped_relu(model.colorize(test_img, test=True), z=1.0).data)
imsave(
'test_colorized{}.png'.format(trainer.updater.epoch),
colorized_img
.transpose(0, 2, 3, 1)
.reshape((8, 8, 32, 32, 3))
.transpose(1, 2, 0, 3, 4)
.reshape(8*32, 8*32, 3)
)
trainer.extend(test_model)
trainer.extend(extensions.ProgressBar(update_interval=100))
trainer.run()
if __name__ == '__main__':
main()
Recommended Posts