This is the second post. Last time implemented DCGAN with PyTorch and made it possible to save output images one by one.
This time, we will implement an improved conditional GAN (conditional GAN) so that the output of GAN can be controlled. At the same time, as in the last time, we will be able to save the output images one by one.
Implement conditional GAN and save output one by one
conditional GAN Conditional GAN allows you to explicitly separate the generated images. This was made possible by training using the label information of the teacher data during training. The paper is here
From the following paper It's like learning by adding class label information to both Generator and Discriminator inputs. It seems that the input format changes a little, but the basic structure of GAN does not change.
Let's move on to implementation. This time, we will implement conditional GAN based on DCGAN that was implemented last time.
Google Colaboratory
First from the module import
import argparse
import os
import numpy as np
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from torchvision import datasets
import torch.nn as nn
import torch.nn.functional as F
import torch
img_save_path = 'images-C_dcgan'
os.makedirs(img_save_path, exist_ok=True)
It's almost the same as last time. The subtle change is that the generated image size is 32x32 instead of the MNIST default of 28x28.
parser = argparse.ArgumentParser()
parser.add_argument('--n_epochs', type=int, default=200, help='number of epochs of training')
parser.add_argument('--batch_size', type=int, default=64, help='size of the batches')
parser.add_argument('--lr', type=float, default=0.0002, help='adam: learning rate')
parser.add_argument('--beta1', type=float, default=0.5, help='adam: decay of first order momentum of gradient')
parser.add_argument('--beta2', type=float, default=0.999, help='adam: decay of second order momentum of gradient')
parser.add_argument('--n_cpu', type=int, default=8, help='number of cpu threads to use during batch generation')
parser.add_argument('--latent_dim', type=int, default=100, help='dimensionality of the latent space')
parser.add_argument('--n_classes', type=int, default=10, help='number of classes for dataset')
parser.add_argument('--img_size', type=int, default=32, help='size of each image dimension')
parser.add_argument('--channels', type=int, default=1, help='number of image channels')
parser.add_argument('--sample_interval', type=int, default=400, help='interval between image sampling')
args = parser.parse_args()
#args for google colab=parser.parse_args(args=[])
print(args)
C,H,W = args.channels, args.img_size, args.img_size
def weights_init_normal(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
torch.nn.init.normal(m.weight, 0.0, 0.02)
elif classname.find('BatchNorm2d') != -1:
torch.nn.init.normal(m.weight, 1.0, 0.02)
torch.nn.init.constant(m.bias, 0.0)
Generator Let's define the Generator. Generate with cat Combine the image information and the label information to generate.
class Generator(nn.Module):
# initializers
def __init__(self, d=128):
super(Generator, self).__init__()
self.deconv1_1 = nn.ConvTranspose2d(100, d*2, 4, 1, 0)
self.deconv1_1_bn = nn.BatchNorm2d(d*2)
self.deconv1_2 = nn.ConvTranspose2d(10, d*2, 4, 1, 0)
self.deconv1_2_bn = nn.BatchNorm2d(d*2)
self.deconv2 = nn.ConvTranspose2d(d*4, d*2, 4, 2, 1)
self.deconv2_bn = nn.BatchNorm2d(d*2)
self.deconv3 = nn.ConvTranspose2d(d*2, d, 4, 2, 1)
self.deconv3_bn = nn.BatchNorm2d(d)
self.deconv4 = nn.ConvTranspose2d(d, C, 4, 2, 1)
# forward method
def forward(self, input, label):
x = F.relu(self.deconv1_1_bn(self.deconv1_1(input)))
y = F.relu(self.deconv1_2_bn(self.deconv1_2(label)))
x = torch.cat([x, y], 1)
x = F.relu(self.deconv2_bn(self.deconv2(x)))
x = F.relu(self.deconv3_bn(self.deconv3(x)))
x = torch.tanh(self.deconv4(x))
return x
Last time, I implemented Generator with Upsampling + Conv2d. This time, we are implementing using ConvTranspose2d instead of the previous method. This difference is summarized in this article, so please have a look if you are interested.
Discriminator The definition of Discriminator. Label information is also attached with cat here.
class Discriminator(nn.Module):
# initializers
def __init__(self, d=128):
super(Discriminator, self).__init__()
self.conv1_1 = nn.Conv2d(C, d//2, 4, 2, 1)
self.conv1_2 = nn.Conv2d(10, d//2, 4, 2, 1)
self.conv2 = nn.Conv2d(d, d*2, 4, 2, 1)
self.conv2_bn = nn.BatchNorm2d(d*2)
self.conv3 = nn.Conv2d(d*2, d*4, 4, 2, 1)
self.conv3_bn = nn.BatchNorm2d(d*4)
self.conv4 = nn.Conv2d(d * 4, 1, 4, 1, 0)
def forward(self, input, label):
x = F.leaky_relu(self.conv1_1(input), 0.2)
y = F.leaky_relu(self.conv1_2(label), 0.2)
x = torch.cat([x, y], 1)
x = F.leaky_relu(self.conv2_bn(self.conv2(x)), 0.2)
x = F.leaky_relu(self.conv3_bn(self.conv3(x)), 0.2)
x = F.sigmoid(self.conv4(x))
return x
Define the Loss function, initialize the weights, initialize the Generator / Discriminator, and set the Optimizer.
# Loss function
adversarial_loss = torch.nn.BCELoss()
# Initialize Generator and discriminator
generator = Generator()
discriminator = Discriminator()
if torch.cuda.is_available():
generator.cuda()
discriminator.cuda()
adversarial_loss.cuda()
# Initialize weights
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)
# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=args.lr, betas=(args.beta1, args.beta2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=args.lr, betas=(args.beta1, args.beta2))
We will create a Dataloader. This time, the image is generated with a size of 32 * 32, so the MNIST image is resized in the pre-processing part of the image.
# Configure data loader
os.makedirs('./data', exist_ok=True)
dataloader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=True, download=True,
transform=transforms.Compose([
transforms.Resize(args.img_size),
transforms.ToTensor(),
transforms.Normalize([0.5,], [0.5,])
])),
batch_size=args.batch_size, shuffle=True, drop_last=True)
print('the data is ok')
Training Training of GAN.
for epoch in range(1, args.n_epochs+1):
for i, (imgs, labels) in enumerate(dataloader):
Batch_Size = args.batch_size
N_Class = args.n_classes
img_size = args.img_size
# Adversarial ground truths
valid = torch.ones(Batch_Size).cuda()
fake = torch.zeros(Batch_Size).cuda()
# Configure input
real_imgs = imgs.type(torch.FloatTensor).cuda()
real_y = torch.zeros(Batch_Size, N_Class)
real_y = real_y.scatter_(1, labels.view(Batch_Size, 1), 1).view(Batch_Size, N_Class, 1, 1).contiguous()
real_y = real_y.expand(-1, -1, img_size, img_size).cuda()
# Sample noise and labels as generator input
noise = torch.randn((Batch_Size, args.latent_dim,1,1)).cuda()
gen_labels = (torch.rand(Batch_Size, 1) * N_Class).type(torch.LongTensor)
gen_y = torch.zeros(Batch_Size, N_Class)
gen_y = gen_y.scatter_(1, gen_labels.view(Batch_Size, 1), 1).view(Batch_Size, N_Class,1,1).cuda()
# ---------------------
# Train Discriminator
# ---------------------
optimizer_D.zero_grad()
# Loss for real images
d_real_loss = adversarial_loss(discriminator(real_imgs, real_y).squeeze(), valid)
# Loss for fake images
gen_imgs = generator(noise, gen_y)
gen_y_for_D = gen_y.view(Batch_Size, N_Class, 1, 1).contiguous().expand(-1, -1, img_size, img_size)
d_fake_loss = adversarial_loss(discriminator(gen_imgs.detach(),gen_y_for_D).squeeze(), fake)
# Total discriminator loss
d_loss = (d_real_loss + d_fake_loss)
d_loss.backward()
optimizer_D.step()
# -----------------
# Train Generator
# -----------------
optimizer_G.zero_grad()
g_loss = adversarial_loss(discriminator(gen_imgs,gen_y_for_D).squeeze(), valid)
g_loss.backward()
optimizer_G.step()
print ("[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]" % (epoch, args.n_epochs, i, len(dataloader),
d_loss.data.cpu(), g_loss.data.cpu()))
batches_done = epoch * len(dataloader) + i
if epoch % 20 == 0:
noise = torch.FloatTensor(np.random.normal(0, 1, (N_Class**2, args.latent_dim,1,1))).cuda()
#fixed labels
y_ = torch.LongTensor(np.array([num for num in range(N_Class)])).view(N_Class,1).expand(-1,N_Class).contiguous()
y_fixed = torch.zeros(N_Class**2, N_Class)
y_fixed = y_fixed.scatter_(1,y_.view(N_Class**2,1),1).view(N_Class**2, N_Class,1,1).cuda()
with torch.no_grad():
gen_imgs = generator(noise, y_fixed).view(-1,C,H,W)
save_image(gen_imgs.data, img_save_path + '/epoch:%d.png' % epoch, nrow=N_Class, normalize=True)
The execution result is as follows. You can see that the generated images are neatly arranged for each class. Conditional GAN allows you to control the images generated in this way.
Like last time, we will be able to save images one by one.
if epoch % 20 == 0:
noise = torch.FloatTensor(np.random.normal(0, 1, (N_Class**2, args.latent_dim,1,1))).cuda()
#fixed labels
y_ = torch.LongTensor(np.array([num for num in range(N_Class)])).view(N_Class,1).expand(-1,N_Class).contiguous()
y_fixed = torch.zeros(N_Class**2, N_Class)
y_fixed = y_fixed.scatter_(1,y_.view(N_Class**2,1),1).view(N_Class**2, N_Class,1,1).cuda()
with torch.no_grad():
gen_imgs = generator(noise, y_fixed).view(-1,C,H,W)
save_image(gen_imgs.data, img_save_path + '/epoch:%d.png' % epoch, nrow=N_Class, normalize=True)
Here part
if epoch % 20 == 0:
for l in range(10): #Save 10 sheets for each class
noise = torch.FloatTensor(np.random.normal(0, 1, (N_Class**2, args.latent_dim,1,1))).cuda()
#fixed labels
y_ = torch.LongTensor(np.array([num for num in range(N_Class)])).view(N_Class,1).expand(-1,N_Class).contiguous()
y_fixed = torch.zeros(N_Class**2, N_Class)
y_fixed = y_fixed.scatter_(1,y_.view(N_Class**2,1),1).view(N_Class**2, N_Class,1,1).cuda()
for m in range()
with torch.no_grad():
gen_imgs = generator(noise, y_fixed).view(-1,C,H,W)
save_gen_imgs = gen_imgs[10*i]
save_image(save_gen_imgs, img_save_path + '/epochs:%d/%d/epoch:%d-%d_%d.png' % (epoch, i, epoch,i, j), normalize=True)
Change it like this. If you want to do this, you need to change the directory structure for saving images.
images-C_dcgan
├── epochs:20
│ ├── 0
│ ├── 1
│ ├── 2
│ ├── 3
│ ├── 4
│ ├── 5
│ ├── 6
│ ├── 7
│ ├── 8
│ └── 9
│ .
│ .
│ .
│
└── epochs:200
├── 0
├── 1
├── 2
├── 3
├── 4
├── 5
├── 6
├── 7
├── 8
└── 9
There are 0 to 9 directories every 20 epochs. It's easier to create at once using ʻos.makedirs`. Images are now saved for each class.
This time, we implemented conditional GAN following DCGAN so that the generated images can be saved one by one. This time, we implemented conditional GAN by adding label information to both Generator and Discriminator inputs, which is the simplest. Currently, the de facto standards for implementing conditional GAN are technologies such as Projection Discriminator and Conditional Batch Normalization. I don't understand much about the technology around here, so if I have a chance, I would like to study while implementing it.
Recommended Posts