While reading "Deep Learning from scratch" (written by Yasuki Saito, published by O'Reilly Japan), I will make a note of the sites I referred to. Part 7 ← → Part 9
However, this method makes the calculation of the gradient very fast, I understand the advantages of modularizing and implementing as a "layer".
From P162, a learning program using the error back propagation method is listed, but in order to execute this, the programs with various definitions listed on P142 and later are also required.
Just because you explain it doesn't mean you can understand it.
In such a case, you have to try various things because you can either hold down what you understand and move on, or whatever the contents of the book are.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
def function_2(x, y):
return x**2/20 + y**2
# x,Coordinate range of y
x = np.arange(-10.0, 10.0, 0.1)
y = np.arange(-10.0, 10.0, 0.1)
# x,Lattice data of y
X, Y = np.meshgrid(x, y)
#Set the value of the defined function
Z = function_2(X, Y)
#Add figure
fig = plt.figure(figsize=(10.0, 8.0))
#Create a 3D axis
ax = fig.add_subplot(111, projection='3d')
#Set axis label
ax.set_title("Figure 6-1 f(x,y)=x**2/20+y**2", size = 14)
ax.set_xlabel("x", size = 14)
ax.set_ylabel("y", size = 14)
ax.set_zlabel("f(x, y)", size = 14)
#Set axis scale
ax.set_xticks([-10.0, -5.0, 0.0, 5.0, 10.0])
ax.set_yticks([-10.0, -5.0, 0.0, 5.0, 10.0])
ax.set_zticks([0.0, 20.0, 40.0, 60.0, 80.0, 100.0])
#drawing
ax.plot_wireframe(X, Y, Z)
#ax.plot_surface(X, Y, Z, rstride=1, cstride=1)
#ax.contour3D(X,Y,Z)
#ax.contourf3D(X,Y,Z)
#ax.scatter3D(np.ravel(X),np.ravel(Y),np.ravel(Z))
plt.show()
If you change the plot_wireframe, the plot will be different.
While researching various things, I found something like this. You can rotate the drawn graph and see it from various directions.
import numpy as np
import matplotlib
#It seems that the backend of matplotlib is set, but I don't know what it means.
#However, after adding this line, the graph will open in a new window.
matplotlib.use('TkAgg')
#for plotting
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def function_2(x, y):
return x**2/20 + y**2
x = np.arange(-10.0, 10.0, 0.1)
y = np.arange(-10.0, 10.0, 0.1)
X, Y = np.meshgrid(x, y)
Z = function_2(X, Y)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, cmap='bwr', linewidth=0)
fig.colorbar(surf)
ax.set_title("Surface Plot")
fig.show()
The color of the graph seems to be specified by the parameter of cmap. matplotlib color example code
import matplotlib.pyplot as plt
import numpy as np
def function_2(x, y):
return x**2/20 + y**2
x = np.arange(-10.0, 10.0, 0.1)
y = np.arange(-10.0, 10.0, 0.1)
h = np.arange(0., 100.0, 1.0)
X, Y = np.meshgrid(x, y)
Z = function_2(X, Y)
plt.figure()
plt.contour(X, Y, Z, levels=h)
plt.xlim([-10, 10])
plt.ylim([-10, 10])
plt.show()
contour (position on x-axis, position on y-axis, height on coordinates, levels = [specify height to plot line]) Since the increment of the value of the array x and y is set to 0.1, the line is smooth, but it takes time to display. If you set this to 1.0, it will be displayed immediately, but the lines are uneven. For h, specify the height at which you want to draw a line. In the example, a line is drawn one by one from 0 to 100.
import matplotlib.pyplot as plt
import numpy as np
def _numerical_gradient_no_batch(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val #Restore the value
return grad
def numerical_gradient(f, X):
if X.ndim == 1:
return _numerical_gradient_no_batch(f, X)
else:
grad = np.zeros_like(X)
for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_no_batch(f, x)
return grad
def function_2(x):
return (x[0]**2/20+x[1]**2)
x = np.arange(-10.0, 10.0, 1.)
y = np.arange(-10.0, 10.0, 1.)
h = np.arange(0., 100.0, 10.0)
X, Y = np.meshgrid(x, y)
X = X.flatten()
Y = Y.flatten()
grad = numerical_gradient(function_2, np.array([X, Y]).T).T
plt.figure()
plt.quiver(X, Y, -grad[0], -grad[1], angles="xy",color="#666666")
plt.xlim([-10, 10])
plt.ylim([-5, 5])
plt.xlabel('x')
plt.ylabel('y')
plt.grid()
plt.draw()
plt.show()
I just changed function_2 in gradient_2d.py in folder ch04. quiver (position on x-axis, position on y-axis, gradient in x-axis, gradient in y-axis)
import matplotlib.pyplot as plt
import numpy as np
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val #Restore the value
it.iternext()
return grad
def adagrad(x, lr, grad, v, moment):
v += grad * grad
x -= lr * grad / (np.sqrt(v) + 1e-7)
return x, v
def momentum(x, lr, grad, v, moment):
v = moment*v - lr*grad
x += v
return x, v
def sgd(x, lr, grad, v = None, moment = None):
x -= lr * grad
return x, v
def gradient_descent(opt, f, init_x, lr=0.01, step_num=100, moment=0.9):
x = init_x
x_history = []
v = 0
for i in range(step_num):
x_history.append( x.copy() )
grad = numerical_gradient(f, x)
x, v = opt(x, lr, grad, v, moment)
return np.array(x_history)
def function_1(x, y):
return x**2/20 + y**2
def function_2(x):
return (x[0]**2/20+x[1]**2)
x = np.arange(-10.0, 10.0, 0.1)
y = np.arange(-10.0, 10.0, 0.1)
h = np.arange(0., 10.0, 1.0)
X, Y = np.meshgrid(x, y)
Z = function_1(X, Y)
plt.figure()
plt.contour(X, Y, Z, levels=h)
init_x = np.array([-7.0, 2.0])
x_history = gradient_descent(sgd, function_2, init_x, lr=0.9, step_num=100)
#x_history = gradient_descent(momentum, function_2, init_x, lr=0.2, step_num=20, moment=0.9)
#x_history = gradient_descent(adagrad, function_2, init_x, lr=0.9, step_num=100)
plt.plot(x_history[:,0], x_history[:,1],'-ro')
plt.xlim([-10, 10])
plt.ylim([-10, 10])
plt.xlabel('x')
plt.ylabel('y')
plt.grid()
plt.show()
In the case of SGD, if the learning coefficient lr is not set properly, it will not be zigzag as in the example. If it is 1.0, it will be zigzag, but it will not converge to 0. If it is 0.7 or less, it will converge to 0 before the zigzag becomes noticeable. 0.9 is the most suitable graph.
In the case of momentum, if you do not adjust the value of moment as well as the learning coefficient lr, it will not look like the example in the book.
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val #Restore the value
it.iternext()
return grad
def adagrad(x, lr, grad, v, moment):
v += grad * grad
x -= lr * grad / (np.sqrt(v) + 1e-7)
return x, v
def momentum(x, lr, grad, v, moment):
v = moment*v - lr*grad
x += v
return x, v
def sgd(x, lr, grad, v = None, moment = None):
x -= lr * grad
return x, v
def gradient_descent(opt, f, init_x, lr=0.01, step_num=100, moment=0.9):
x = init_x
x_history = []
v = 0
for i in range(step_num):
w = x.tolist()
z = f(x)
w.append(z)
x_history.append( w )
grad = numerical_gradient(f, x)
x, v = opt(x, lr, grad, v, moment)
return np.array(x_history)
def function_1(x, y):
return x**2/20 + y**2
def function_2(x):
return (x[0]**2/20+x[1]**2)
#for plotting
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
x = np.arange(-8.0, 8.0, .1)
y = np.arange(-4.0, 4.0, .1)
X, Y = np.meshgrid(x, y)
Z = function_1(X, Y)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(X, Y, Z, cmap='bwr', linewidth=0)
init_x = np.array([-7.0, 2.0])
x_history = gradient_descent(sgd, function_2, init_x, lr=0.9, step_num=100)
#x_history = gradient_descent(momentum, function_2, init_x, lr=0.2, step_num=20, moment=0.9)
#x_history = gradient_descent(adagrad, function_2, init_x, lr=0.9, step_num=100)
ax.plot(x_history[:,0], x_history[:,1], x_history[:,2],'-ro')
fig.colorbar(surf)
ax.set_title("Surface Plot")
fig.show()
In the definition of gradient_descent x_history.append( x.copy() ) It has become. This means "make a duplicate of the same content as x and add it to x_history". x_history.append( x ) Written, it means "add the memory location referenced by the name x to x_history", and when the contents of x are rewritten, the contents of x_history are also rewritten. The same thing happens with the assignment a = x. This seems to be "something" in a python array and is explained in various places.
It seems that there is a function called np.nditer (x, flags = ['multi_index'], op_flags = ['readwrite']) in the definition of numerical_gradient, and then it is used to control the loop. If you don't know, print out the contents of the loop and check it.
x = np.array([[-7.0, 2.0],[-6., 1.],[-5., 0.]])
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
print("x[" + str(idx) + "] : " + str(x[idx]))
it.iternext()
x[(0, 0)] : -7.0 x[(0, 1)] : 2.0 x[(1, 0)] : -6.0 x[(1, 1)] : 1.0 x[(2, 0)] : -5.0 x[(2, 1)] : 0.0
I see. Now, change the input a little
x = np.array([[-7.0, 2.0,-6.],[1., -5., 0.]])
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
print("x[" + str(idx) + "] : " + str(x[idx]))
it.iternext()
x[(0, 0)] : -7.0 x[(0, 1)] : 2.0 x[(0, 2)] : -6.0 x[(1, 0)] : 1.0 x[(1, 1)] : -5.0 x[(1, 2)] : 0.0
Even if the number of elements and dimensions of x change, it can be processed without changing the code of the program.
This is the end of Chapter 6, Section 1. I was just playing with drawing graphs, but I learned about arrays and python grammar. I was able to understand what the gradient was in and how it was drawn on the graph.
A very summary of matplotlib Python 3: How to write a 3D graph mplot3d tutorial matplotlib color example code matplotlib axes.plot
Recommended Posts