[Deep Learning from scratch The theory and implementation of deep learning learned from Python](https://www.amazon.co.jp/%E3%82%BC%E3%83%AD%E3%81%8B%E3%] 82% 89% E4% BD% 9C% E3% 82% 8BDeep-Learning-Python% E3% 81% A7% E5% AD% A6% E3% 81% B6% E3% 83% 87% E3% 82% A3% E3% 83% BC% E3% 83% 97% E3% 83% A9% E3% 83% BC% E3% 83% 8B% E3% 83% B3% E3% 82% B0% E3% 81% AE% E7% 90% 86% E8% AB% 96% E3% 81% A8% E5% AE% 9F% E8% A3% 85-% E6% 96% 8E% E8% 97% A4-% E5% BA% B7% E6% AF% 85 / dp / 4873117585 / ref = sr_1_1? s = digital-text & ie = UTF8 & qid = 1483316946 & sr = 8-1 & keywords = deep + learning +% E3% 82% BC% E3% 83% AD% E3% 81% 8B% E3% 82% 89)
sigmoid function
from sigmoid import *
def sigmoid(x):
return 1 / (1 + np.exp(-x))
sigmoid(7)
0.9990889488055994
x = np.array([4,6,-2,-1, 2])
sigmoid(x)
array([ 0.98201379, 0.99752738, 0.11920292, 0.26894142, 0.88079708])
step function
from step_function import *
def step_function(x):
return np.array(x > 0, dtype=np.int)
step_function(5)
array(1)
step_function(-5)
array(0)
x = np.array([3,-6,4,-1])
step_function(x)
array([1, 0, 1, 0])
x = np.random.randn(2,3)
print(x)
print(step_function(x))
[[ 0.21780529 -0.05316613 1.28802155]
[-0.55119659 -1.23515555 0.6576237 ]]
[[1 0 1]
[0 0 1]]
ReLU function Rectified Linear Unit
A function that outputs the input as it is if the input exceeds 0, and outputs 0 if it is 0 or less.
from relu import *
def relu(x):
return np.maximum(0, x)
B = np.array([[1,2], [3,4], [5,6]]); B
array([[1, 2],
[3, 4],
[5, 6]])
np.ndim(B)
2
B.shape
(3, 2)
A = np.array([[3,2,1], [6,5,4]])
A.dot(B)
array([[14, 20],
[41, 56]])
x = np.array([1., .5])
W1 = np.array([[.1, .3, .5], [.2, .4, .6]])
B1 = np.array([.1, .2, .3])
print('Input signal x=', x)
print('Weight W1=', W1)
print('Bias B1=', B1)
Input signal x = [1. 0.5] Weight W1 = [[0.1 0.3 0.5] [ 0.2 0.4 0.6]] Bias B1 = [0.1 0.2 0.3]
x.shape, W1.shape
((2,), (2, 3))
Match the number of elements in the dimensions of x and W1!
A1 = np.dot(x, W1) + B1; A1
array([ 0.3, 0.7, 1.1])
The weighted sum (sum of weighted signal and bias) in the hidden layer is represented by $ a $.
Z1 = sigmoid(A1); Z1
array([ 0.57444252, 0.66818777, 0.75026011])
The signal converted by the activation function is represented by $ z $.
The sigmoid function corresponds to $ h $ () in the figure.
W2 = np.array([[.1, .4], [.2, .5], [.3, .6]])
B2 = np.array([.1, .2])
print('W2=', W2)
print('B2=', B2)
W2= [[ 0.1 0.4]
[ 0.2 0.5]
[ 0.3 0.6]]
B2= [ 0.1 0.2]
A2 = np.dot(Z1, W2) + B2; A2
array([ 0.51615984, 1.21402696])
Z2 = sigmoid(A2); Z2
array([ 0.62624937, 0.7710107 ])
def identity_function(x):
"""A function called an identity function that outputs the input as it is and does nothing"""
return x
W3 = np.array([[.1, .3], [.2, .4]])
B3 = np.array([.1, .2])
print('W3=', W3)
print('B3=', B3)
W3= [[ 0.1 0.3]
[ 0.2 0.4]]
B3= [ 0.1 0.2]
A3 = np.dot(Z2, W3) + B3
Y= identity_function(A3); Y
array([ 0.31682708, 0.69627909])
A function called identity_function () is used as the activation function of the output layer.
The activation function of the output layer is represented by $ \ sigma $ (), which is distinguished from the activation function of the hidden layer $ h $ ().
def init_network():
"""Weight and bias initialization"""
network = {}
network['W1'] = np.array([[.1, .3, .5], [.2, .4, .6]])
network['W2'] = np.array([[.1, .4], [.2, .5], [.3, .6]])
network['W3'] = np.array([[.1, .3], [.2, .4]])
network['b1'] = np.array([.1, .2, .3])
network['b2'] = np.array([.1, .2])
network['b3'] = np.array([.1, .2])
return network
def forward(network, x):
"""The process by which an input signal is converted to an output"""
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
#Input layer
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
#Hidden layer
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
#Output layer
a3 = np.dot(z2, W3) + b3
return identity_function(a3)
x
array([ 1. , 0.5])
network = init_network()
x = np.array([1., .5])
forward(network, x)
array([ 0.31682708, 0.69627909])
def softmax(a):
"""Returns the input value a with probability"""
exp_a = np.exp(a - np.max(a)) #Overflow measures
return exp_a/ np.sum(exp_a)
A value that is too large becomes ʻinf` = "infinity" (called ** overflow **) and hits the ceiling, making accurate calculations impossible. Therefore, convert $ e ^ a $ to a smaller value and then divide by the total value so that they are mathematically equivalent as shown below.
What is explained above is that no matter what value you add to $ e ^ {a_k} $, the value of $ y_k $ will not change if you add the same denominator.
softmax(np.array([.3, 2.9, 4.]))
array([ 0.01821127, 0.24519181, 0.73659691])
import os, sys
sys.path.append(os.pardir) #Parent directory append
from dataset.mnist import load_mnist
Do the following to download the .gz file of the mnist dataset, unzip it and put it in the pkl file.
It takes a few minutes to finish.
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
ソースはch03.mnist_show.py
The image is displayed in .BMP format.
# %load mnist_show.py
import sys, os
sys.path.append(os.pardir) #Settings for importing files in the parent directory
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
img = x_train[0]
label = t_train[0]
print(label) # 5
print(img.shape) # (784,)
img = img.reshape(28, 28) #Transform the shape to the original image size
print(img.shape) # (28, 28)
img_show(img)
5
(784,)
(28, 28)
# %load neuralnet_mnist.py
import sys, os
sys.path.append(os.pardir) #Settings for importing files in the parent directory
import numpy as np
import pickle
from dataset.mnist import load_mnist
from common.functions import sigmoid, softmax
def get_data():
"""Loading training data"""
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
return x_test, t_test
def init_network():
"""sample_weight.Loading learned weight parameters stored in pkl"""
with open("sample_weight.pkl", 'rb') as f:
network = pickle.load(f)
return network
def predict(network, x):
"""Implementation of neural network
The output layer is not an identity function
It is a softmax function."""
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, W3) + b3
y = softmax(a3)
return y
%%timeit
x, t = get_data()
network = init_network()
accuracy_cnt = 0
for i in range(len(x)):
y = predict(network, x[i])
p= np.argmax(y) #Get the index of the most probable element
if p == t[i]:
accuracy_cnt += 1
print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
Accuracy:0.9352
Accuracy:0.9352
Accuracy:0.9352
Accuracy:0.9352
1 loop, best of 3: 1.25 s per loop
Transition of array shape in batch processing
The processing time per sheet can be shortened by reducing the load on the bus bandwidth, that is, by batch processing in which the ratio of operations to data reading is increased.
Reading a large array and calculating a large array at a time finishes the operation faster than calculating a small divided array little by little.
%%timeit
# %load neuralnet_mnist_batch.py
x, t = get_data()
network = init_network()
batch_size = 100 #Number of batches
accuracy_cnt = 0
for i in range(0, len(x), batch_size):
x_batch = x[i:i+batch_size]
y_batch = predict(network, x_batch)
p = np.argmax(y_batch, axis=1)
accuracy_cnt += np.sum(p == t[i:i+batch_size])
print("Accuracy:" + str(float(accuracy_cnt) / len(x)))
Accuracy:0.9352
Accuracy:0.9352
Accuracy:0.9352
Accuracy:0.9352
1 loop, best of 3: 269 ms per loop
It was able to be executed about 5 times faster by batch processing.
Recommended Posts