See the contents of Kumantic Segumantion

"Kumantic Segumantion" to get information about bears from images showing bears. Following Last time, this time I will try to check what the network defined in Kumantic Segumantion is looking at. I did.

A network that recognizes the silhouette of a bear

Last time Same as the defined one. Hereinafter, it will be referred to as "bear network".

import torch
from torch import nn, optim
from torch.nn import functional as F
class Kuma(nn.Module):
    def __init__(self):
        super(Kuma, self).__init__()
        #Encoder part
        self.encode1 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 1, out_channels = 6, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(6)
              ])
        self.encode2 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 6, out_channels = 16, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(16)
              ])
        self.encode3 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 16, out_channels = 32, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(32)
              ])

        self.encode4 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(64)
              ])

        #Decoder part
        self.decode4 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 64, out_channels = 32, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(32)
              ])
        self.decode3 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 32, out_channels = 16, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(16)
              ])
        self.decode2 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 16, out_channels = 6, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(6)
              ])
        self.decode1 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 6, out_channels = 1, kernel_size = 3, padding = 1),
              ])

    def forward(self, x):
        #Encoder part
        dim_0 = x.size()      
        x = F.relu(self.encode1(x))
        x, idx_1 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)

        dim_1 = x.size() 
        x = F.relu(self.encode2(x))
        x, idx_2 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)

        dim_2 = x.size()
        x = F.relu(self.encode3(x)) 
        x, idx_3 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)

        dim_3 = x.size()
        x = F.relu(self.encode4(x)) 
        x, idx_4 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)

        #Decoder part
        x = F.max_unpool2d(x, idx_4, kernel_size = 2, stride = 2, output_size = dim_3)
        x = F.relu(self.decode4(x))

        x = F.max_unpool2d(x, idx_3, kernel_size = 2, stride = 2, output_size = dim_2)
        x = F.relu(self.decode3(x))

        x = F.max_unpool2d(x, idx_2, kernel_size = 2, stride = 2, output_size = dim_1)           
        x = F.relu(self.decode2(x))

        x = F.max_unpool2d(x, idx_1, kernel_size = 2, stride = 2, output_size = dim_0)           
        x = F.relu(self.decode1(x))

        x = torch.sigmoid(x)                                     

        return x

Learned network download

Last time Download the created learned bear network.

url = "https://github.com/maskot1977/PythonCourse2019/blob/master/kuma_050_20200226.pytorch?raw=true"
import urllib.request
urllib.request.urlretrieve(url, 'kuma_050_20200226.pytorch') #Download data
('kuma_050_20200226.pytorch', <http.client.HTTPMessage at 0x7f73177ebef0>)

Load

Load the trained bear network on the defined bear network.

kuma = Kuma()
kuma.load_state_dict(torch.load("kuma_050_20200226.pytorch"))
<All keys matched successfully>

Check the contents

kuma
Kuma(
  (encode1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (encode2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (encode3): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (encode4): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (decode4): Sequential(
    (0): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (decode3): Sequential(
    (0): ConvTranspose2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (decode2): Sequential(
    (0): ConvTranspose2d(16, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (decode1): Sequential(
    (0): ConvTranspose2d(6, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
)

Looking at the parameters one by one,

for name, param in kuma.named_parameters():
    print(name, param.shape)
encode1.0.weight torch.Size([6, 1, 3, 3])
encode1.0.bias torch.Size([6])
encode1.1.weight torch.Size([6])
encode1.1.bias torch.Size([6])
encode2.0.weight torch.Size([16, 6, 3, 3])
encode2.0.bias torch.Size([16])
encode2.1.weight torch.Size([16])
encode2.1.bias torch.Size([16])
encode3.0.weight torch.Size([32, 16, 3, 3])
encode3.0.bias torch.Size([32])
encode3.1.weight torch.Size([32])
encode3.1.bias torch.Size([32])
encode4.0.weight torch.Size([64, 32, 3, 3])
encode4.0.bias torch.Size([64])
encode4.1.weight torch.Size([64])
encode4.1.bias torch.Size([64])
decode4.0.weight torch.Size([64, 32, 3, 3])
decode4.0.bias torch.Size([32])
decode4.1.weight torch.Size([32])
decode4.1.bias torch.Size([32])
decode3.0.weight torch.Size([32, 16, 3, 3])
decode3.0.bias torch.Size([16])
decode3.1.weight torch.Size([16])
decode3.1.bias torch.Size([16])
decode2.0.weight torch.Size([16, 6, 3, 3])
decode2.0.bias torch.Size([6])
decode2.1.weight torch.Size([6])
decode2.1.bias torch.Size([6])
decode1.0.weight torch.Size([6, 1, 3, 3])
decode1.0.bias torch.Size([1])

Ah, that's right, I fully understood ← I don't understand

Kernel (or filter)

Apparently, the matrix parameters shown above are called kernels (or filters) in the world of deep learning. The shape of the matrix is as shown above, but let's visualize what numbers are in it.

import matplotlib.pyplot as plt
for name, param in kuma.named_parameters():
    print(name)
    print(param.shape)
    if len(param.shape) == 4:
        x, y, z, w = param.shape
        idx = 0
        fig = plt.figure(figsize=(x, y))
        for para in param:
            for par in para:
                idx += 1
                ax = fig.add_subplot(y, x, idx)
                im = ax.imshow(par.detach().numpy(), cmap="gray")
                ax.axis('off')
                #fig.colorbar(im)
        plt.show()
    #break
encode1.0.weight
torch.Size([6, 1, 3, 3])

output_7_1.png

output_7_3.png

output_7_5.png

output_7_7.png

output_7_9.png

output_7_11.png

output_7_13.png

output_7_15.png

Speaking of my lack of knowledge, this bear network scans the image with this "3 x 3" kernel (or filter) to extract features in the image such as "lines".

Still, I'm not sure how this recognizes Kuma-san.

Pass the bear image through the bear network

Even if I look at the kernel (or filter), it doesn't come out well, so I tried passing the bear image through the bear network and seeing what the bear image looks like in each layer.

Bear image generation

It is the same as Last time.

import numpy as np
import random
from PIL import Image, ImageDraw, ImageFilter
from itertools import product

def draw_bear(n_bear=1): #Randomly generate an image of a bear
    r = g = b = 250
    im = Image.new('RGB', (400, 400), (r, g, b))
    draw = ImageDraw.Draw(im)

    for _ in range(random.randint(-1, 0)):
        r = random.randint(10, 200)
        g = random.randint(10, 200)
        b = random.randint(10, 200)
        x1 = random.randint(0, 400)
        y1 = random.randint(0, 400)
        dx = random.randint(10, 50)
        dy = random.randint(10, 50)
        draw.ellipse((x1, y1, x1+dx, y1+dy), fill=(r, g, b))

    for _ in range(n_bear):
        r = g = b = 1
        center_x = 200
        center_y = 200
        wx = 60
        wy = 50
        dx1 = 90
        dx2 = 20
        dy1 = 90
        dy2 = 20
        dx3 = 15
        dy3 = 100
        dy4 = 60
        shape1 = (center_x - wx, center_y - wy, center_x + wx, center_y + wy)
        shape2 = (center_x - dx1, center_y - dy1, center_x - dx2, center_y - dy2)
        shape3 = (center_x + dx2, center_y - dy1, center_x + dx1, center_y - dy2)
        shape4 = (center_x - dx3, center_y - dy3, center_x + dx3, center_y - dy4)

        zoom = 0.2 + random.random() * 0.4
        center_x = random.randint(-30, 250)
        center_y = random.randint(-30, 250)

        shape1 = modify(shape1, zoom=zoom, center_x=center_x, center_y=center_y)
        shape2= modify(shape2, zoom=zoom, center_x=center_x, center_y=center_y)
        shape3 = modify(shape3, zoom=zoom, center_x=center_x, center_y=center_y)
        shape4 = modify(shape4, zoom=zoom, center_x=center_x, center_y=center_y)

        draw.ellipse(shape1, fill=(r, g, b))
        draw.ellipse(shape2, fill=(r, g, b))
        draw.ellipse(shape3, fill=(r, g, b))
        #draw.ellipse(shape4, fill=(r, g, b))

    return im

def modify(shape, zoom=1, center_x=0, center_y=0):
    x1, y1, x2, y2 = np.array(shape) * zoom
    return (x1 + center_x, y1 + center_y, x2 + center_x, y2 + center_y)

class Noise: #Add noise to the bear's image
    def __init__(self, input_image):
        self.input_image = input_image
        self.input_pix = self.input_image.load()
        self.w, self.h = self.input_image.size

    def saltpepper(self, salt=0.05, pepper=0.05):
        output_image = Image.new("RGB", self.input_image.size)
        output_pix = output_image.load()

        for x, y in product(*map(range, (self.w, self.h))):
            r = random.random()
            if r < salt:
                output_pix[x, y] = (255, 255, 255)
            elif r > 1 - pepper:
                output_pix[x, y] = (  0,   0,   0)
            else:
                output_pix[x, y] = self.input_pix[x, y]
        return output_image

##Process bear's image into teacher data for semantic segmentation
def getdata_for_semantic_segmentation(im): 
    x_im = im.filter(ImageFilter.CONTOUR)
    im2 = Noise(input_image=x_im)
    x_im = im2.saltpepper()
    a_im = np.asarray(im)
    y_im = Image.fromarray(np.where(a_im == 1, 255, 0).astype(dtype='uint8'))
    return x_im, y_im

Slightly modified bear network

I modified it to visualize the results on the way.

import torch
from torch import nn, optim
from torch.nn import functional as F
class Kuma(nn.Module):
    def __init__(self):
        super(Kuma, self).__init__()
        #Encoder part
        self.encode1 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 1, out_channels = 6, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(6)
              ])
        self.encode2 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 6, out_channels = 16, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(16)
              ])
        self.encode3 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 16, out_channels = 32, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(32)
              ])

        self.encode4 = nn.Sequential(
            *[
              nn.Conv2d(
                  in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(64)
              ])

        #Decoder part
        self.decode4 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 64, out_channels = 32, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(32)
              ])
        self.decode3 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 32, out_channels = 16, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(16)
              ])
        self.decode2 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 16, out_channels = 6, kernel_size = 3, padding = 1),
              nn.BatchNorm2d(6)
              ])
        self.decode1 = nn.Sequential(
            *[
              nn.ConvTranspose2d(
                  in_channels = 6, out_channels = 1, kernel_size = 3, padding = 1),
              ])

    def forward(self, x):
        print("forward input:", x.shape)
        draw_layer(x)
        #Encoder part
        dim_0 = x.size()      
        x = F.relu(self.encode1(x))
        x, idx_1 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)
        print("after encode1:", x.shape)
        draw_layer(x)

        dim_1 = x.size() 
        x = F.relu(self.encode2(x))
        x, idx_2 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)
        print("after encode2:", x.shape)
        draw_layer(x)

        dim_2 = x.size()
        x = F.relu(self.encode3(x)) 
        x, idx_3 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)
        print("after encode3:", x.shape)
        draw_layer(x)

        dim_3 = x.size()
        x = F.relu(self.encode4(x)) 
        x, idx_4 = F.max_pool2d(x, kernel_size = 2, stride = 2, return_indices = True)
        print("after encode4:", x.shape)
        draw_layer(x)

        #Decoder part
        x = F.max_unpool2d(x, idx_4, kernel_size = 2, stride = 2, output_size = dim_3)
        x = F.relu(self.decode4(x))
        print("after decode4:", x.shape)
        draw_layer(x)

        x = F.max_unpool2d(x, idx_3, kernel_size = 2, stride = 2, output_size = dim_2)
        x = F.relu(self.decode3(x))
        print("after decode3:", x.shape)
        draw_layer(x)

        x = F.max_unpool2d(x, idx_2, kernel_size = 2, stride = 2, output_size = dim_1)           
        x = F.relu(self.decode2(x))
        print("after decode2:", x.shape)
        draw_layer(x)

        x = F.max_unpool2d(x, idx_1, kernel_size = 2, stride = 2, output_size = dim_0)           
        x = F.relu(self.decode1(x))
        x = torch.sigmoid(x)  
        print("after decode1:", x.shape) 
        draw_layer(x)                                  

        return x

def draw_layer(param):
    if len(param.shape) == 4:
        x, y, z, w = param.shape
        idx = 0
        fig = plt.figure(figsize=(y*2, x*2))
        for para in param:
            for par in para:
                idx += 1
                ax = fig.add_subplot(x, y, idx)
                im = ax.imshow(par.detach().numpy(), cmap="gray")
                ax.axis('off')
                #fig.colorbar(im)
        plt.show()
kuma = Kuma()
kuma.load_state_dict(torch.load("kuma_050_20200226.pytorch"))
<All keys matched successfully>

I made 4 images with 3 bears per image and checked how they look in the middle layer.

X_test = [] #Stores image data for testing
Y_test = [] #Stores correct answer data for testing
Z_test = [] #Store prediction results for testing

for i in range(4): #Generate 4 new data not used for learning
    x_im, y_im = getdata_for_semantic_segmentation(draw_bear(3))
    X_test.append(x_im)
    Y_test.append(y_im)

#Format test image data for PyTorch
X_test_a = np.array([[np.asarray(x).transpose((2, 0, 1))[0]] for x in X_test])
X_test_t = torch.tensor(X_test_a, dtype = torch.float32)

#Calculate predictions using a trained model
Y_pred = kuma(X_test_t)

#Store predicted values as ndarray
for pred in Y_pred:
    Z_test.append(pred.detach().numpy())
forward input: torch.Size([4, 1, 400, 400])

output_12_1.png

output_12_3.png

output_12_5.png

output_12_7.png

output_12_9.png

output_12_11.png

output_12_13.png

output_12_15.png

output_12_17.png

The encoder 1st layer gets a rough outline, the 2nd to 3rd layers remove noise, the 3rd to 4th layers obtain a "contoured area", and the decoder 1st to 4th layers. It's like restoring it so that it can be mapped onto the original image.

Recommended Posts

See the contents of Kumantic Segumantion
Simulation of the contents of the wallet
Understand the contents of sklearn's pipeline
Kumantic Segumantion
How to see the contents of the Jupyter notebook ipynb file
I checked the contents of docker volume
Read all the contents of proc / [pid]
Contents of __name__
Get the contents of git diff from python
The contents of the Python tutorial (Chapter 5) are itemized.
The contents of the Python tutorial (Chapter 4) are itemized.
The contents of the Python tutorial (Chapter 2) are itemized.
See the behavior of drunkenness with reinforcement learning
The contents of the Python tutorial (Chapter 8) are itemized.
The contents of the Python tutorial (Chapter 1) are itemized.
The contents of the Python tutorial (Chapter 10) are itemized.
About the development contents of machine learning (Example)
Dump the contents of redis db with lua
The contents of the Python tutorial (Chapter 6) are itemized.
The contents of the Python tutorial (Chapter 3) are itemized.
See here for the amount of free memory of the free command
The beginning of cif2cell
Template of python script to read the contents of the file
The meaning of self
Obtained contents of sosreport
the zen of Python
The story of sys.path.append ()
Not being aware of the contents of the data in python
Try to get the contents of Word with Golang
[Note] Contents of shape [0], shape [1], shape [2]
[Maya Python] Crush the contents of the script 2 ~ list Notes
Also read the contents of arch / arm / kernel / swp_emulate.c
Revenge of the Types: Revenge of types
I searched for the contents of CloudWatch Logs Agent
Django returns the contents of the file as an HTTP response
Analyzing user dissatisfaction very easily from the contents of inquiries
[Maya Python] Crush the contents of the script 3 ~ List unknown Plugins
[Maya Python] Crush the contents of the script 1 ~ Camera Speed Editor
See the power of speeding up with NumPy and SciPy
Settings to debug the contents of the library with VS Code
View the contents of the queue using the RabbitMQ Management Web API
[Data science memorandum] Confirmation of the contents of DataFrame type [python]
A Python script that compares the contents of two directories
How to connect the contents of a list into a string
Align the version of chromedriver_binary
Scraping the result of "Schedule-kun"
10. Counting the number of lines
The story of building Zabbix 4.4
Towards the retirement of Python2
[Apache] The story of prefork
taichi's Torisetsu ⓪ Table of contents
Compare the fonts of jupyter-themes
About the ease of Python
Get the number of digits
Explain the code of Tensorflow_in_ROS
Reuse the results of clustering
GoPiGo3 of the old man
Calculate the number of changes
Change the theme of Jupyter
The popularity of programming languages
Change the style of matplotlib