While reading "Deep Learning from scratch" (written by Yasuki Saito, published by O'Reilly Japan), I will make a note of the sites I referred to. Part 13 ←
I used Google Colaboratory to run the program in the book.
I am doing it with such a folder structure.
content/
└ Drive name/
└My Drive/
└Colab Notebooks/
└deep_learning/
├common/
| ├functions.py
| └two_layer_net.py
├dataset/
| ├mnist.py
| ├mnist.pkl
| └lena.png
├ch04
| ├train_neuralnet.ipynb
| └test_neuralne.ipynb
|
Modify the import part of two_layer_net.py in Chapter 4 and set it in the common folder.
two placed in the common folder_layer_net.py
import sys, os
sys.path.append(os.pardir)
from functions import *
from gradient import numerical_gradient
import numpy as np
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
#Weight initialization
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
return y
# x:Input data, t:Teacher data
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y, t)
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
# x:Input data, t:Teacher data
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
def gradient(self, x, t):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {}
batch_num = x.shape[0]
# forward
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
# backward
dy = (y - t) / batch_num
grads['W2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)
dz1 = np.dot(dy, W2.T)
da1 = sigmoid_grad(a1) * dz1
grads['W1'] = np.dot(x.T, da1)
grads['b1'] = np.sum(da1, axis=0)
return grads
I split the code of train_neuralnet.py into several parts and ran them in interactive mode.
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
import sys, os
sys.path
['', '/env/python', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '/usr/local/lib/python3.6/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.6/dist-packages/IPython/extensions', '/root/.ipython']
sys.path.append(os.pardir) #Settings for importing files in the parent directory
sys.path.append('/content/drive/My Drive/Colab Notebooks/deep_learning/common')
sys.path.append('/content/drive/My Drive/Colab Notebooks/deep_learning/dataset')
sys.path
['', '/env/python', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '/usr/local/lib/python3.6/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.6/dist-packages/IPython/extensions', '/root/.ipython', '..', '/content/drive/My Drive/Colab Notebooks/deep_learning/common', '/content/drive/My Drive/Colab Notebooks/deep_learning/dataset']
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from two_layer_net import TwoLayerNet
If you run another script before running this script and the contents of the files or folders are different, you will get a ModuleNotFoundError here. If that happens, you won't be able to proceed even if the script is okay. Runtime> Session Management> Ending a running session and trying to re-execute after a while seems to work.
#Data reading
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
iters_num = 10000 #Set the number of repetitions as appropriate
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
#Gradient calculation
#grad = network.numerical_gradient(x_batch, t_batch)
grad = network.gradient(x_batch, t_batch)
#Parameter update
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)
if i % iter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
#Drawing a graph
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
Furthermore, the learning result is saved in the pkl file.
#Save the network object with pickle.
import pickle
save_file = '/content/drive/My Drive/Colab Notebooks/deep_learning/dataset/TwoLayerNet_weight.pkl'
with open(save_file, 'wb') as f:
pickle.dump(network, f, -1)
Let's judge the test data using the saved result.
import sys, os
sys.path.append(os.pardir)
sys.path.append('/content/drive/My Drive/Colab Notebooks/deep_learning/common')
sys.path.append('/content/drive/My Drive/Colab Notebooks/deep_learning/dataset')
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from two_layer_net import TwoLayerNet
#Data reading
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=False)
#network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
import pickle
weight_file = '/content/drive/My Drive/Colab Notebooks/deep_learning/dataset/TwoLayerNet_weight.pkl'
with open(weight_file, 'rb') as f:
network = pickle.load(f)
#Check the contents of the identification result
import matplotlib.pyplot as plt
def showImg(x):
example = x.reshape((28, 28))
plt.figure()
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(example)
plt.show()
return
test_size = 10
test_mask = np.random.choice(10000, test_size)
x = x_test[test_mask]
t = t_test[test_mask]
for i in range(10):
y = network.predict(x[i])
p= np.argmax(y)
print("Correct answer" + str(t[i]))
print("Judgment[ " + str(p) + " ]")
count = 0
for v in y:
print("["+str(count)+"] {:.2%}".format(v))
count += 1
showImg(x[i])
Occasionally I get a ModuleNotFoundError, but I've confirmed that the book program works.
Running train_convnet.py in Chapter 7 in interactive mode required the following tweaks:
Modify the import statement of simple_convnet.py in the folder ch07 and set it in the folder common.
simple_convnet.py
from common.layers import *
from common.gradient import numerical_gradient
#Modify the import statement as follows and set it to common
from layers import *
from gradient import numerical_gradient
Modify the import statement of the following modules in common in the same way layers.py trainer.py
Create a new Notebook, mount the drive and set the path.
from google.colab import drive
drive.mount('/content/drive')
import sys, os
sys.path.append(os.pardir) #Settings for importing files in the parent directory
sys.path.append('/content/drive/My Drive/Colab Notebooks/deep_learning/common')
sys.path.append('/content/drive/My Drive/Colab Notebooks/deep_learning/dataset')
Execute the train_convnet.py script in order. However, modify the import statement before executing it.
from mnist import load_mnist
from simple_convnet import SimpleConvNet
from trainer import Trainer
I was able to execute it in the same procedure as the program in Chapter 4.
Part 13 ← Click here for the table of contents of the memo Unreadable Glossary
Recommended Posts