J'ai essayé kaokore, un jeu de données japonais classique sur le visage sorti récemment.
https://github.com/rois-codh/kaokore
L'ensemble de données était un visage classique, et il y avait deux tâches, le sexe et le statut. Cette fois, j'ai essayé de classer le genre en deux classes avec pytorch d'EfficientNet.
Déposez-le d'abord localement
$ git clone https://github.com/rois-codh/kaokore.git
Appuyez ensuite sur la commande suivante pour obtenir les données d'image.
$ python download.py
Ensuite, la structure des répertoires est la suivante. Il y a une image dans images_256 et le libellé de l'image est labels.csv.
kaokore --- kaokore --- images_256 --- XXXXXX.jpg
|- labels.csv
EfficientNet
Essayez ensuite d'utiliser EfficientNet.
EfficientNet a différents types d'implémentations, qui sont toutes résumées ici.
https://github.com/yoyoyo-yo/DeepLearningMugenKnock
Cette fois, j'utilise EfficientNet B0
import torch
import torch.nn.functional as F
import argparse
import cv2
import numpy as np
from glob import glob
import copy
from collections import OrderedDict
from tqdm import tqdm
import pandas as pd
Définissez la taille de l'image, etc.
class_label = ['male', 'female'] # class name
class_N = len(class_label) # class number
img_height, img_width = 128, 128 # image size
channel = 3 # channel size
# GPU
GPU = True # if necessary
device = torch.device("cuda" if GPU else "cpu")
torch.manual_seed(0)
EfficientNet
Définissez le modèle.
L'article original est https://arxiv.org/abs/1905.11946
EfficientNet est devenu un sujet brûlant dans le modèle qui était SoTA de classification à cette époque. Cette fois, j'ai réassemblé avec pytorch en référence à https://github.com/keras-team/keras-applications/blob/master/keras_applications/efficnet.py.
class EfficientNetB0(torch.nn.Module):
def __init__(self):
super(EfficientNetB0, self).__init__()
# Net config
width_coefficient=1
depth_coefficient=1
dropout_ratio=0.2
depth_divisor=8
drop_connect_rate=0.2
DEFAULT_BLOCKS_ARGS = [
# block 1
{'kernel_size': 3, 'repeats': 1, 'filters_in': 32, 'filters_out': 16,
'expand_ratio': 1, 'id_skip': True, 'stride': 1, 'se_ratio': 0.25},
# block 2
{'kernel_size': 3, 'repeats': 2, 'filters_in': 16, 'filters_out': 24,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 3
{'kernel_size': 5, 'repeats': 2, 'filters_in': 24, 'filters_out': 40,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 4
{'kernel_size': 3, 'repeats': 3, 'filters_in': 40, 'filters_out': 80,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 5
{'kernel_size': 5, 'repeats': 3, 'filters_in': 80, 'filters_out': 112,
'expand_ratio': 6, 'id_skip': True, 'stride': 1, 'se_ratio': 0.25},
# block 6
{'kernel_size': 5, 'repeats': 4, 'filters_in': 112, 'filters_out': 192,
'expand_ratio': 6, 'id_skip': True, 'stride': 2, 'se_ratio': 0.25},
# block 7
{'kernel_size': 3, 'repeats': 1, 'filters_in': 192, 'filters_out': 320,
'expand_ratio': 6, 'id_skip': True, 'stride': 1, 'se_ratio': 0.25}
]
def round_filters(filters, divisor=depth_divisor):
"""Round number of filters based on depth multiplier."""
filters *= width_coefficient
new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_filters < 0.9 * filters:
new_filters += divisor
return int(new_filters)
def round_repeats(repeats):
"""Round number of repeats based on depth multiplier."""
return int(np.ceil(depth_coefficient * repeats))
class Reshape(torch.nn.Module):
def __init__(self, c, h, w):
super(Reshape, self).__init__()
self.c = c
self.h = h
self.w = w
def forward(self, x):
x = x.view(x.size()[0], self.c, self.h, self.w)
return x
class Flatten(torch.nn.Module):
def __init__(self):
super(Flatten, self).__init__()
def forward(self, x):
x = x.view(x.size()[0], -1)
return x
# activation
class Swish(torch.nn.Module):
def __init__(self):
super(Swish, self).__init__()
def forward(self, x):
return x * torch.sigmoid(x)
# EfficientNet block
class Block(torch.nn.Module):
def __init__(self, activation_fn=Swish(), drop_rate=0., name='',
filters_in=32, filters_out=16, kernel_size=3, stride=1,
expand_ratio=1, se_ratio=0., id_skip=True):
super(Block, self).__init__()
# Expansion phase
filters = filters_in * expand_ratio
if expand_ratio != 1:
_modules = OrderedDict()
_modules[name + 'expand_conv'] = torch.nn.Conv2d(filters_in, filters, kernel_size=1, padding=0, bias=False)
_modules[name + 'expand_bn'] = torch.nn.BatchNorm2d(filters)
_modules[name + 'expand_activation'] = activation_fn
self.expansion = torch.nn.Sequential(_modules)
# Depthwise Convolution
_modules = OrderedDict()
conv_pad = kernel_size // 2
_modules[name + 'dw_conv'] = torch.nn.Conv2d(filters, filters, kernel_size, stride=stride, padding=conv_pad, bias=False, groups=1)
_modules[name + 'dw_bn'] = torch.nn.BatchNorm2d(filters)
_modules[name + 'dw_activation'] = activation_fn
self.DW_conv = torch.nn.Sequential(_modules)
# Squeeze and Excitation phase
if 0 < se_ratio <= 1:
filters_se = max(1, int(filters_in * se_ratio))
_modules = OrderedDict()
_modules[name + 'se_sqeeze'] = torch.nn.AdaptiveMaxPool2d((1, 1))
_modules[name + 'se_reshape'] = Reshape(c=filters, h=1, w=1)
_modules[name + 'se_reduce_conv'] = torch.nn.Conv2d(filters, filters_se, kernel_size=1, padding=0)
_modules[name + 'se_reduce_activation'] = activation_fn
_modules[name + 'se_expand_conv'] = torch.nn.Conv2d(filters_se, filters, kernel_size=1, padding=0)
_modules[name + 'se_expand_activation'] = torch.nn.Sigmoid()
self.SE_phase = torch.nn.Sequential(_modules)
# Output phase
_modules = OrderedDict()
_modules[name + 'project_conv'] = torch.nn.Conv2d(filters, filters_out, kernel_size=1, padding=0, bias=False)
_modules[name + 'project_bn'] = torch.nn.BatchNorm2d(filters_out)
self.output_phase = torch.nn.Sequential(_modules)
#
self.last_add = False
if (id_skip is True and stride == 1 and filters_in == filters_out):
if drop_rate > 0:
self.output_phase_Dropout = torch.nn.Dropout2d(p=drop_rate)
self.last_add = True
def forward(self, input_x):
# expansion phase
if hasattr(self, 'expansion'):
x = self.expansion(input_x)
else:
x = input_x
x = self.DW_conv(x)
# Squeeze and Excitation phase
if hasattr(self, 'SE_phase'):
x_SE_phase = self.SE_phase(x)
x = x * x_SE_phase
# Output phase
x = self.output_phase(x)
if hasattr(self, 'output_phase_Dropout'):
x = self.output_phase_Dropout(x)
if self.last_add:
x = x + input_x
return x
# stem
_modules = OrderedDict()
_modules['stem_conv'] = torch.nn.Conv2d(channel, 32, kernel_size=3, padding=1, stride=2, bias=False)
_modules['stem_bn'] = torch.nn.BatchNorm2d(32)
_modules['stem_activation'] = Swish()
self.stem = torch.nn.Sequential(_modules)
# block
_modules = []
b = 0
block_Num = float(sum(args['repeats'] for args in DEFAULT_BLOCKS_ARGS))
for (i, args) in enumerate(DEFAULT_BLOCKS_ARGS):
# Update block input and output filters based on depth multiplier.
args['filters_in'] = round_filters(args['filters_in'])
args['filters_out'] = round_filters(args['filters_out'])
for j in range(round_repeats(args.pop('repeats'))):
# The first block needs to take care of stride and filter size increase.
if j > 0:
args['stride'] = 1
args['filters_in'] = args['filters_out']
_modules.append(
Block(activation_fn=Swish(), drop_rate=drop_connect_rate * b / block_Num, name='block{}{}_'.format(i + 1, chr(j + 97)), **args))
b += 1
self.block = torch.nn.Sequential(*_modules)
# top
_modules = OrderedDict()
_modules['top_conv'] = torch.nn.Conv2d(DEFAULT_BLOCKS_ARGS[-1]['filters_out'], round_filters(1280), kernel_size=1, padding=0, bias=False)
_modules['top_bn'] = torch.nn.BatchNorm2d(round_filters(1280))
_modules['top_activation'] = Swish()
self.top = torch.nn.Sequential(_modules)
_modules = OrderedDict()
_modules['top_class_GAP'] = torch.nn.AdaptiveMaxPool2d((1, 1))
if dropout_ratio > 0:
_modules['top_class_dropout'] = torch.nn.Dropout2d(p=dropout_ratio)
_modules['top_class_flatten'] = Flatten()
_modules['top_class_linear'] = torch.nn.Linear(round_filters(1280), class_N)
self.top_class = torch.nn.Sequential(_modules)
def forward(self, x):
# stem
x = self.stem(x)
# blocks
x = self.block(x)
# top
x = self.top(x)
x = self.top_class(x)
x = F.softmax(x, dim=1)
return x
Définissez une fonction pour lire les données. Cette fonction renvoie le chemin du fichier image et l'augmentation des données sous forme de liste. Spécifiez s'il faut lire les données d'entraînement avec les données de train ou de test.
Vous pouvez charger labels.csv, charger le chemin de l'image et l'étiquette de genre et l'ajouter à la liste.
# get train data
def data_load(path, hf=False, vf=False, rot=False, train=True):
paths = []
ts = []
df = pd.read_csv(path + 'labels.csv')
if train:
_df = df.query('set == "train"')
else:
_df = df.query('set == "test"')
data_num = len(_df)
pbar = tqdm(total = data_num)
for i, row in _df.iterrows():
name = row['image']
gender = row['gender']
paths.append([path + 'images_256/' + name, False, False])
ts.append(gender)
if hf:
paths.append([path + 'images_256/' + name, True, False])
ts.append(gender)
if vf:
paths.append([path + 'images_256/' + name, False, True])
ts.append(gender)
if hf and vf:
paths.append([path + 'images_256/' + name, True, True])
ts.append(gender)
pbar.update(1)
pbar.close()
print()
return np.array(paths), np.array(ts)
Ensuite, définissez une fonction pour lire les données d'image
def get_image(paths):
xs = []
for info in paths:
path, hf, vf = info
x = cv2.imread(path)
if channel == 1:
x = cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
x = cv2.resize(x, (img_width, img_height)).astype(np.float32)
x = x / 127.5 - 1
if channel == 3:
x = x[..., ::-1]
if hf:
x = x[:, ::-1]
if vf:
x = x[::-1]
xs.append(x)
xs = np.array(xs, dtype=np.float32)
if channel == 1:
xs = np.expand_dims(xs, axis=-1)
xs = np.transpose(xs, (0,3,1,2))
return xs
# train
def train():
# model
model = EfficientNetB0().to(device)
opt = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
model.train()
#xs, ts, paths = data_load('drive/My Drive/Colab Notebooks/' + '/Dataset/train/images/', hf=True, vf=True, rot=1)
paths, ts = data_load('drive/My Drive/Colab Notebooks/', hf=True, vf=True, rot=False, train=True)
# training
mb = 32
mbi = 0
data_N = len(paths)
train_ind = np.arange(len(paths))
np.random.seed(0)
np.random.shuffle(train_ind)
loss_fn = torch.nn.CrossEntropyLoss()
# start training
for i in range(5000):
# get minibatch
if mbi + mb > data_N:
mb_ind = copy.copy(train_ind)[mbi:]
np.random.shuffle(train_ind)
mb_ind = np.hstack((mb_ind, train_ind[:(mb - (data_N - mbi))]))
else:
mb_ind = train_ind[mbi: mbi + mb]
mbi += mb
# get X and t
x = torch.tensor(get_image(paths[mb_ind]), dtype=torch.float).to(device)
t = torch.tensor(ts[mb_ind], dtype=torch.long).to(device)
opt.zero_grad()
y = model(x)
#y = F.log_softmax(y, dim=1)
loss = loss_fn(y, t)
loss.backward()
opt.step()
pred = y.argmax(dim=1, keepdim=True)
acc = pred.eq(t.view_as(pred)).sum().item() / mb
if (i + 1) % 10 == 0:
print("iter >>", i+1, ', loss >>', loss.item(), ', accuracy >>', acc)
torch.save(model.state_dict(), 'drive/My Drive/Colab Notebooks/kaokore_gender_efficientnetB0.pt')
# test
def test():
model = EfficientNetB0().to(device)
model.eval()
model.load_state_dict(torch.load('drive/My Drive/Colab Notebooks/kaokore_gender_efficientnetB0.pt'))
paths, ts = data_load('drive/My Drive/Colab Notebooks/', hf=False, vf=False, rot=False, train=False)
accuracy = 0.
for i in range(len(paths)):
x = torch.tensor(get_image([paths[i]]), dtype=torch.float).to(device)
pred = model(x)
pred = pred.detach().cpu().numpy()[0]
if pred.argmax() == ts[i]:
accuracy += 1
Accuracy = accuracy / len(paths)
print('Accuracy = {:.2f} ({} / {})'.format(Accuracy, accuracy, len(paths)))
Avec le code ci-dessus, je suis allé avec GPU chez Google Colaboratory.
Cependant, le taux d'apprentissage est approprié, donc le résultat n'est pas le meilleur,
Visez 100%! !!
Recommended Posts