――I tried to do what the title says. I've done it all, so as a memo. ――There is a reason why you can't touch the contents deeply, and there are some parts that are a little messy. ――For example, where did you declare that variable? There may be a target --Since the original was cut and pasted with a notebook, there are magic commands --There are reference links here and there.
--Multi-class, multi-label classification of images. -An image like "This image is class A. This image corresponds to A and B." --I wanted to use pytorch, so I implemented it with pytorch. ――That's natural! Do you need such a comment? Because it is immature to see something like this.
I did the following. But this may not be the best honestly. This happened because the investigation started running moderately.
Folder structure
.
├── data
│ ├── labels //Image and label combination json storage
│ │ ├── A.json
│ │ ├── B.json
│ │ └── and many other json
│ └── images //jpg Image storage. Mixed for learning and verification
│ ├── A.jpg
│ ├── B.jpg
│ └── Many other jpgs
├── model //Model save destination
└── predict //Installed as an image storage area that you want to predict
By the way, the contents of json under labels are like this. The key is the image name and the value is the class information (1 or 0).
sample
# A.json
{
"A": {
"Label A": 1,
"Label B": 1,
"Label C": 0
}
}
# B.json
{
"B": {
"Label A": 0,
"Label B": 0,
"Label C": 1
}
}
# ref: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py
from PIL import Image
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import pathlib
import random
#Use if you have a GPU
def check_cuda():
return 'cuda:0' if torch.cuda.is_available() else 'cpu'
device = torch.device(check_cuda())
#Training data, test data division
image_set = {pathlib.Path(i).stem for i in pathlib.Path('data/images').glob('*.jpg')}
n_data = len(image_set)
traindata_rate = 0.7
train_idx = random.sample(range(n_data), int(n_data*traindata_rate))
_trainset = {}
_testset = {}
for i, _tuple in enumerate(image_set.items()):
k, v = _tuple
if i in train_idx:
_trainset[k] = v
else :
_testset[k] = v
Transform
# ref: https://qiita.com/takurooo/items/e4c91c5d78059f92e76d
trfm = transforms.Compose([
transforms.Resize((100, 100)), # image size --> (100, 100)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
Dataset
class MultiLabelDataSet(torch.utils.data.Dataset):
def __init__(self, labels, image_dir='./data/images', ext='.jpg', transform=None):
self.labels = labels
self.image_dir = image_dir
self.ext = ext
self.transform = transform
self.keys = list(labels.keys())
self.vals = list(labels.values())
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
image_path = f'{self.image_dir}/{self.keys[idx]}{self.ext}'
image_array = Image.open(image_path)
if self.transform:
image = self.transform(image_array)
else:
image = torch.Tensor(np.transpose(image_array, (2, 0, 1)))/255 # for 0~1 scaling
label = torch.Tensor(list(self.vals[idx].values()))
return {'image': image, 'label': label}
DataLoader
batch_size = 8
trainset = MultiLabelDataSet(_trainset, transform=trfm)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=False, num_workers=2)
testset = MultiLabelDataSet(_testset, transform=trfm)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=2)
classes = ['A', 'B', 'C'...]Like
import matplotlib.pyplot as plt
%matplotlib inline
# functions to show an image
def imshow(img):
plt.imshow(np.transpose(img, (1, 2, 0)))
plt.show()
# sample data
dataiter = iter(trainloader)
tmp = dataiter.next()
images = tmp['image']
labels = tmp['label']
# print images
imshow(torchvision.utils.make_grid(images))
The number of layers and channels is appropriate ...
I use BCEWithLogitsLoss
so I don't bite the sigmoid (I said so when I googled)
class MultiClassifier(nn.Module):
def __init__(self):
super(MultiClassifier, self).__init__()
self.ConvLayer1 = nn.Sequential(
# ref(H_out & W_out): https://pytorch.org/docs/stable/nn.html#conv2d
nn.Conv2d(3, 32, 3),
nn.MaxPool2d(2),
nn.ReLU(),
)
self.ConvLayer2 = nn.Sequential(
nn.Conv2d(32, 64, 3),
nn.MaxPool2d(2),
nn.ReLU(),
)
self.ConvLayer3 = nn.Sequential(
nn.Conv2d(64, 128, 3),
nn.MaxPool2d(2),
nn.ReLU(),
)
self.ConvLayer4 = nn.Sequential(
nn.Conv2d(128, 256, 3),
nn.MaxPool2d(2),
nn.ReLU(),
nn.Dropout(0.2, inplace=True),
)
self.Linear1 = nn.Linear(256 * 4 * 4, 2048)
self.Linear2 = nn.Linear(2048, 1024)
self.Linear3 = nn.Linear(1024, 512)
self.Linear4 = nn.Linear(512, len(classes))
def forward(self, x):
x = self.ConvLayer1(x)
x = self.ConvLayer2(x)
x = self.ConvLayer3(x)
x = self.ConvLayer4(x)
# print(x.size())
x = x.view(-1, 256 * 4 * 4)
x = self.Linear1(x)
x = self.Linear2(x)
x = self.Linear3(x)
x = self.Linear4(x)
return x
def try_gpu(target):
if check_cuda():
device = torch.device(check_cuda())
target.to(device)
model = MultiClassifier()
try_gpu(model)
A variable called pos_weight
suddenly appears in the criterion, but this is because of the weighting when the positive class is correct.
https://pytorch.org/docs/stable/nn.html#torch.nn.BCEWithLogitsLoss
If you don't need such an operation, you don't need to specify it. I specified it because I wanted to increase the weight at the time of correct answer. The details are linked as ref, so there ~~ I escape the explanation ~~
# ref: https://medium.com/@thevatsalsaglani/training-and-deploying-a-multi-label-image-classifier-using-pytorch-flask-reactjs-and-firebase-c39c96f9c427
import numpy as np
from pprint import pprint
from torch.autograd import Variable
import torch.optim as optim
# ref: https://discuss.pytorch.org/t/bceloss-vs-bcewithlogitsloss/33586
# ref: https://discuss.pytorch.org/t/weights-in-bcewithlogitsloss/27452
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
try_gpu(criterion)
optimizer = optim.SGD(model.parameters(), lr = 0.005, momentum = 0.9)
def pred_acc(original, predicted):
# ref: https://pytorch.org/docs/stable/torch.html#module-torch
return torch.round(predicted).eq(original).sum().numpy()/len(original)
def fit_model(epochs, model, dataloader, phase='training', volatile = False):
if phase == 'training':
model.train()
if phase == 'validataion':
model.eval()
volatile = True
running_loss = []
running_acc = []
for i, data in enumerate(dataloader):
inputs, target = Variable(data['image']), Variable(data['label'])
# for GPU
if device != 'cpu':
inputs, target = inputs.to(device), target.to(device)
if phase == 'training':
optimizer.zero_grad() #Gradient initialization
ops = model(inputs)
acc_ = []
for j, d in enumerate(ops):
acc = pred_acc(torch.Tensor.cpu(target[j]), torch.Tensor.cpu(d))
acc_.append(acc)
loss = criterion(ops, target)
running_loss.append(loss.item())
running_acc.append(np.asarray(acc_).mean())
if phase == 'training':
loss.backward() #Backpropagation of error
optimizer.step() #Parameter update
total_batch_loss = np.asarray(running_loss).mean()
total_batch_acc = np.asarray(running_acc).mean()
if epochs % 10 == 0:
pprint(f"[{phase}] Epoch: {epochs}, loss: {total_batch_loss}.")
pprint(f"[{phase}] Epoch: {epochs}, accuracy: {total_batch_acc}.")
return total_batch_loss, total_batch_acc
from tqdm import tqdm
num = 50
best_val = 99
trn_losses = []; trn_acc = []
val_losses = []; val_acc = []
for idx in tqdm(range(1, num+1)):
trn_l, trn_a = fit_model(idx, model, trainloader)
val_l, val_a = fit_model(idx, model, testloader, phase='validation')
trn_losses.append(trn_l); trn_acc.append(trn_a)
val_losses.append(val_l); val_acc.append(val_a)
if best_val > val_l:
torch.save(model.state_dict(), f'model/best_model.pth')
best_val = val_l
best_idx = idx
def get_tensor(img):
tfms = transforms.Compose([
transforms.Resize((100, 100)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
return tfms(Image.open(img)).unsqueeze(0)
def predict(img, label_lst, model):
tnsr = get_tensor(img)
op = model(tnsr) # Predict result(float)
op_b = torch.round(op) # Rounding result(0 or 1)
op_b_np = torch.Tensor.cpu(op_b).detach().numpy()
preds = np.where(op_b_np == 1)[1] # result == 1
sigs_op = torch.Tensor.cpu(torch.round((op)*100)).detach().numpy()[0]
o_p = np.argsort(torch.Tensor.cpu(op).detach().numpy())[0][::-1] # label index order by score desc
# anser label
label = [label_lst[i] for i in preds]
# all result
arg_s = {label_lst[int(j)] : sigs_op[int(j)] for j in o_p}
return label, dict(arg_s.items())
model = MultiClassifier()
model.load_state_dict(torch.load(f'model/best_model.pth', map_location=torch.device('cpu')))
model = model.eval() #Switch to inference mode
target = 'XXXXXX'
img = Image.open(f'predict/{target}.jpg').resize((100, 100))
plt.imshow(img)
_, all_result = predict(f'predict/{target}.jpg', classes, model)
print('predict top5: ', *sorted(all_result.items(), key=lambda x: x[1], reverse=True)[:5])
That's all for implementation.
Data Augmentation (It seems easy to implement), model polishing, I think there is still room for improvement in accuracy if appropriate weight settings are made during evaluation.
I was satisfied because I was able to make it for the time being.
…For your reference. I made something to predict like this.
Recommended Posts