X = np.arange(24).reshape(4,3,2)
y = np.array([[0,1],[0,1],[0,1],[1,0]])
print(X.shape)
#=> (4, 3, 2)
print(y.shape)
#=> (4, 2)
print(X[0])
#=> [[0 1]
#    [2 3]
#    [4 5]]
from keras.layers import Input, Dense
from keras.models import Model
from keras.layers.recurrent import LSTM
import tensorflow as tf
from keras import backend
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='tanh', recurrent_activation='sigmoid')(inputs)
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
model.summary()
 
history = model.fit(X, y, epochs=2, verbose=1)
for weight in model.get_weights():
    print(weight.shape)
#=> (2, 32)
#   (8, 32)
#   (32,)
#   (8, 2)
#   (2,)
model.get_weights (): Returns a list of all model weight tensors with Numpy arrays as elements.
Although I can imagine from the shape of the array, I investigated various things, and found that the first three are LSTM parameters and the remaining two are Dense layer parameters.
Double-check the LSTM cells before discussing the parameters for this LSTM.

Below, I will write the code using the notation shown above as much as possible. The actual calculation is as follows. 
 $ \ quad i_t = \ sigma (x_t W ^ i + h_ {t-1} U ^ i + b ^ i) $ 
 $ \ quad f_t = \ sigma (x_t W ^ f + h_ {t -1} U ^ f + b ^ f) $ 
 $ \ quad \ tilde {C} _t = {\ rm tanh} (x \ _ t W ^ g + h \ _ {t-1} U ^ g + b ^ g) $ 
 $ \ quad o \ _t = \ sigma (x \ _ t W ^ o + h \ _ {t-1} U ^ o + b ^ o) $ 
 $ \ quad C \ _t = f \ _t C \ _ {t-1} + i \ _t \ tilde {C} \ _t $ 
 $ \ quad h \ _t = {\ rm tanh} (C \ _t) o \ _t $
The conclusions about the parameters obtained by get_weights () are as follows.
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_tC, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,:24:]
U_i, U_f, U_tC, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,:24:]
b_i, b_f, b_tC, b_o = b[:8], b[8:16], b[16:24], b[24:]
#We will calculate using the first sample.
_X = X[0]
#Define the activation function.
def sigmoid(x):
    return(1.0/(1.0+np.exp(-x)))
def relu(x):
    ret_x = x
    ret_x[ret_x<0] = 0
    return ret_x
#First C,The values of h are all 0.
C = np.zeros((1,8))
h = np.zeros((1,8))
#LSTM part
for i in range(len(_X)):
    x_t = _X[i]
    i_t = sigmoid(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i)
    f_t = sigmoid(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f)
    tC = np.tanh(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g)
    o_t = sigmoid(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o)
    C = f_t*C + i_t*tC
    h = np.tanh(C) * o_t
#Dense part
output = np.dot(h,dense_W) + dense_b
#softmax calculation
E = []
Esum = 0
for i in range(2):
    E.append(np.exp(output[0,i]))
    Esum += np.exp(output[0,i])
result = []
for i in range(2):
    result.append(E[i]/Esum)
    
print(result)
#=> [0.5211381547054326, 0.4788618452945675]
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.5211382  0.47886187]]
#Create a model with activation changed to relu.
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='relu', recurrent_activation='sigmoid')(inputs) # relu!
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
history = model.fit(X, y, epochs=2, verbose=1)
#Compute the output of the forecast using numpy.
C = np.zeros((1,8))
h = np.zeros((1,8))
_X = X[0]
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_g, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,24:]
U_i, U_f, U_g, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,24:]
b_i, b_f, b_g, b_o = b[:8], b[8:16], b[16:24], b[24:]
for i in range(len(_X)):
    x_t = _X[i]
    i_t = sigmoid(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i)
    f_t = sigmoid(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f)
    tC = relu(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g) # relu!
    o_t = sigmoid(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o)
    C = f_t*C + i_t*tC
    h = relu(C) * o_t # relu!
output = np.dot(h,dense_W) + dense_b
E = []
Esum = 0
for i in range(2):
    E.append(np.exp(output[0,i]))
    Esum += np.exp(output[0,i])
result = []
for i in range(2):
    result.append(E[i]/Esum)
#The output looks like this:
print(result)
#=> [0.5606417941538421, 0.4393582058461578]
# model.predict()Check the output of.
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.5606418 0.4393582]]
#Create a model with activation changed to relu.
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='tanh', recurrent_activation='relu')(inputs) # relu!
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
history = model.fit(X, y, epochs=2, verbose=1)
#Compute the output of the forecast using numpy.
C = np.zeros((1,8))
h = np.zeros((1,8))
_X = X[0]
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_g, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,24:]
U_i, U_f, U_g, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,24:]
b_i, b_f, b_g, b_o = b[:8], b[8:16], b[16:24], b[24:]
for i in range(len(_X)):
    x_t = _X[i]
    i_t = relu(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i) # relu!
    f_t = relu(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f) # relu!
    tC = np.tanh(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g)
    o_t = relu(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o) # relu!
    C = f_t*C + i_t*tC
    h = np.tanh(C) * o_t
output = np.dot(h,dense_W) + dense_b
E = []
Esum = 0
for i in range(2):
    E.append(np.exp(output[0,i]))
    Esum += np.exp(output[0,i])
result = []
for i in range(2):
    result.append(E[i]/Esum)
#The output looks like this:
print(result)
#=> [0.5115599582737976, 0.4884400417262024]
# model.predict()Check the output of.
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.51155996 0.48844004]]

Recommended Posts