X = np.arange(24).reshape(4,3,2)
y = np.array([[0,1],[0,1],[0,1],[1,0]])
print(X.shape)
#=> (4, 3, 2)
print(y.shape)
#=> (4, 2)
print(X[0])
#=> [[0 1]
# [2 3]
# [4 5]]
from keras.layers import Input, Dense
from keras.models import Model
from keras.layers.recurrent import LSTM
import tensorflow as tf
from keras import backend
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='tanh', recurrent_activation='sigmoid')(inputs)
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
model.summary()
history = model.fit(X, y, epochs=2, verbose=1)
for weight in model.get_weights():
print(weight.shape)
#=> (2, 32)
# (8, 32)
# (32,)
# (8, 2)
# (2,)
model.get_weights (): Returns a list of all model weight tensors with Numpy arrays as elements.
Although I can imagine from the shape of the array, I investigated various things, and found that the first three are LSTM parameters and the remaining two are Dense layer parameters.
Double-check the LSTM cells before discussing the parameters for this LSTM.
Below, I will write the code using the notation shown above as much as possible. The actual calculation is as follows.
$ \ quad i_t = \ sigma (x_t W ^ i + h_ {t-1} U ^ i + b ^ i) $
$ \ quad f_t = \ sigma (x_t W ^ f + h_ {t -1} U ^ f + b ^ f) $
$ \ quad \ tilde {C} _t = {\ rm tanh} (x \ _ t W ^ g + h \ _ {t-1} U ^ g + b ^ g) $
$ \ quad o \ _t = \ sigma (x \ _ t W ^ o + h \ _ {t-1} U ^ o + b ^ o) $
$ \ quad C \ _t = f \ _t C \ _ {t-1} + i \ _t \ tilde {C} \ _t $
$ \ quad h \ _t = {\ rm tanh} (C \ _t) o \ _t $
The conclusions about the parameters obtained by get_weights () are as follows.
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_tC, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,:24:]
U_i, U_f, U_tC, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,:24:]
b_i, b_f, b_tC, b_o = b[:8], b[8:16], b[16:24], b[24:]
#We will calculate using the first sample.
_X = X[0]
#Define the activation function.
def sigmoid(x):
return(1.0/(1.0+np.exp(-x)))
def relu(x):
ret_x = x
ret_x[ret_x<0] = 0
return ret_x
#First C,The values of h are all 0.
C = np.zeros((1,8))
h = np.zeros((1,8))
#LSTM part
for i in range(len(_X)):
x_t = _X[i]
i_t = sigmoid(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i)
f_t = sigmoid(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f)
tC = np.tanh(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g)
o_t = sigmoid(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o)
C = f_t*C + i_t*tC
h = np.tanh(C) * o_t
#Dense part
output = np.dot(h,dense_W) + dense_b
#softmax calculation
E = []
Esum = 0
for i in range(2):
E.append(np.exp(output[0,i]))
Esum += np.exp(output[0,i])
result = []
for i in range(2):
result.append(E[i]/Esum)
print(result)
#=> [0.5211381547054326, 0.4788618452945675]
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.5211382 0.47886187]]
#Create a model with activation changed to relu.
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='relu', recurrent_activation='sigmoid')(inputs) # relu!
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
history = model.fit(X, y, epochs=2, verbose=1)
#Compute the output of the forecast using numpy.
C = np.zeros((1,8))
h = np.zeros((1,8))
_X = X[0]
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_g, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,24:]
U_i, U_f, U_g, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,24:]
b_i, b_f, b_g, b_o = b[:8], b[8:16], b[16:24], b[24:]
for i in range(len(_X)):
x_t = _X[i]
i_t = sigmoid(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i)
f_t = sigmoid(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f)
tC = relu(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g) # relu!
o_t = sigmoid(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o)
C = f_t*C + i_t*tC
h = relu(C) * o_t # relu!
output = np.dot(h,dense_W) + dense_b
E = []
Esum = 0
for i in range(2):
E.append(np.exp(output[0,i]))
Esum += np.exp(output[0,i])
result = []
for i in range(2):
result.append(E[i]/Esum)
#The output looks like this:
print(result)
#=> [0.5606417941538421, 0.4393582058461578]
# model.predict()Check the output of.
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.5606418 0.4393582]]
#Create a model with activation changed to relu.
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='tanh', recurrent_activation='relu')(inputs) # relu!
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
history = model.fit(X, y, epochs=2, verbose=1)
#Compute the output of the forecast using numpy.
C = np.zeros((1,8))
h = np.zeros((1,8))
_X = X[0]
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_g, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,24:]
U_i, U_f, U_g, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,24:]
b_i, b_f, b_g, b_o = b[:8], b[8:16], b[16:24], b[24:]
for i in range(len(_X)):
x_t = _X[i]
i_t = relu(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i) # relu!
f_t = relu(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f) # relu!
tC = np.tanh(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g)
o_t = relu(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o) # relu!
C = f_t*C + i_t*tC
h = np.tanh(C) * o_t
output = np.dot(h,dense_W) + dense_b
E = []
Esum = 0
for i in range(2):
E.append(np.exp(output[0,i]))
Esum += np.exp(output[0,i])
result = []
for i in range(2):
result.append(E[i]/Esum)
#The output looks like this:
print(result)
#=> [0.5115599582737976, 0.4884400417262024]
# model.predict()Check the output of.
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.51155996 0.48844004]]
Recommended Posts