X = np.arange(24).reshape(4,3,2)
y = np.array([[0,1],[0,1],[0,1],[1,0]])
print(X.shape)
#=> (4, 3, 2)
print(y.shape)
#=> (4, 2)
print(X[0])
#=> [[0 1]
# [2 3]
# [4 5]]
from keras.layers import Input, Dense
from keras.models import Model
from keras.layers.recurrent import LSTM
import tensorflow as tf
from keras import backend
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='tanh', recurrent_activation='sigmoid')(inputs)
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
model.summary()
history = model.fit(X, y, epochs=2, verbose=1)
for weight in model.get_weights():
print(weight.shape)
#=> (2, 32)
# (8, 32)
# (32,)
# (8, 2)
# (2,)
model.get_weights (): renvoie une liste de tous les tenseurs de poids de modèle avec des tableaux Numpy comme éléments.
Bien que je puisse imaginer à partir de la forme du tableau, j'ai étudié diverses choses et j'ai trouvé que les trois premiers sont des paramètres LSTM et les deux autres sont des paramètres de couche dense.
Vérifiez la cellule LSTM avant de discuter des paramètres de ce LSTM.
Ci-dessous, j'écrirai le code en utilisant autant que possible la notation indiquée ci-dessus. Le calcul réel est le suivant.
$ \ quad i_t = \ sigma (x_t W ^ i + h_ {t-1} U ^ i + b ^ i) $
$ \ quad f_t = \ sigma (x_t W ^ f + h_ {t -1} U ^ f + b ^ f) $
$ \ quad \ tilde {C} _t = {\ rm tanh} (x \ _ t W ^ g + h \ _ {t-1} U ^ g + b ^ g) $
$ \ quad o \ _t = \ sigma (x \ _ t W ^ o + h \ _ {t-1} U ^ o + b ^ o) $
$ \ quad C \ _t = f \ _t C \ _ {t-1} + i \ _t \ tilde {C} \ _t $
$ \ quad h \ _t = {\ rm tanh} (C \ _t) o \ _t $
Les conclusions sur les paramètres obtenus par get_weights () sont les suivantes.
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_tC, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,:24:]
U_i, U_f, U_tC, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,:24:]
b_i, b_f, b_tC, b_o = b[:8], b[8:16], b[16:24], b[24:]
#Nous calculerons en utilisant le premier échantillon.
_X = X[0]
#Définissez la fonction d'activation.
def sigmoid(x):
return(1.0/(1.0+np.exp(-x)))
def relu(x):
ret_x = x
ret_x[ret_x<0] = 0
return ret_x
#Premier C,Les valeurs de h sont toutes 0.
C = np.zeros((1,8))
h = np.zeros((1,8))
#Pièce LSTM
for i in range(len(_X)):
x_t = _X[i]
i_t = sigmoid(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i)
f_t = sigmoid(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f)
tC = np.tanh(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g)
o_t = sigmoid(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o)
C = f_t*C + i_t*tC
h = np.tanh(C) * o_t
#Partie dense
output = np.dot(h,dense_W) + dense_b
#calcul softmax
E = []
Esum = 0
for i in range(2):
E.append(np.exp(output[0,i]))
Esum += np.exp(output[0,i])
result = []
for i in range(2):
result.append(E[i]/Esum)
print(result)
#=> [0.5211381547054326, 0.4788618452945675]
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.5211382 0.47886187]]
#Créez un modèle avec l'activation modifiée en relu.
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='relu', recurrent_activation='sigmoid')(inputs) # relu!
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
history = model.fit(X, y, epochs=2, verbose=1)
#Calculez la sortie de la prédiction à l'aide de numpy.
C = np.zeros((1,8))
h = np.zeros((1,8))
_X = X[0]
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_g, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,24:]
U_i, U_f, U_g, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,24:]
b_i, b_f, b_g, b_o = b[:8], b[8:16], b[16:24], b[24:]
for i in range(len(_X)):
x_t = _X[i]
i_t = sigmoid(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i)
f_t = sigmoid(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f)
tC = relu(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g) # relu!
o_t = sigmoid(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o)
C = f_t*C + i_t*tC
h = relu(C) * o_t # relu!
output = np.dot(h,dense_W) + dense_b
E = []
Esum = 0
for i in range(2):
E.append(np.exp(output[0,i]))
Esum += np.exp(output[0,i])
result = []
for i in range(2):
result.append(E[i]/Esum)
#La sortie ressemble à ceci:
print(result)
#=> [0.5606417941538421, 0.4393582058461578]
# model.predict()Vérifiez la sortie de.
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.5606418 0.4393582]]
#Créez un modèle avec l'activation modifiée en relu.
tf.reset_default_graph()
backend.clear_session()
inputs = Input(shape=[3,2])
x = LSTM(8, activation='tanh', recurrent_activation='relu')(inputs) # relu!
outputs = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam',loss='categorical_crossentropy')
history = model.fit(X, y, epochs=2, verbose=1)
#Calculez la sortie de la prédiction à l'aide de numpy.
C = np.zeros((1,8))
h = np.zeros((1,8))
_X = X[0]
W = model.get_weights()[0]
U = model.get_weights()[1]
b = model.get_weights()[2]
dense_W = model.get_weights()[3]
dense_b = model.get_weights()[4]
W_i, W_f, W_g, W_o = W[:,:8], W[:,8:16], W[:,16:24], W[:,24:]
U_i, U_f, U_g, U_o = U[:,:8], U[:,8:16], U[:,16:24], U[:,24:]
b_i, b_f, b_g, b_o = b[:8], b[8:16], b[16:24], b[24:]
for i in range(len(_X)):
x_t = _X[i]
i_t = relu(np.dot(x_t,W_i) + np.dot(h,U_i) + b_i) # relu!
f_t = relu(np.dot(x_t,W_f) + np.dot(h,U_f) + b_f) # relu!
tC = np.tanh(np.dot(x_t,W_g) + np.dot(h,U_g) + b_g)
o_t = relu(np.dot(x_t,W_o) + np.dot(h,U_o) + b_o) # relu!
C = f_t*C + i_t*tC
h = np.tanh(C) * o_t
output = np.dot(h,dense_W) + dense_b
E = []
Esum = 0
for i in range(2):
E.append(np.exp(output[0,i]))
Esum += np.exp(output[0,i])
result = []
for i in range(2):
result.append(E[i]/Esum)
#La sortie ressemble à ceci:
print(result)
#=> [0.5115599582737976, 0.4884400417262024]
# model.predict()Vérifiez la sortie de.
print(model.predict(_X.reshape(1,3,2)))
#=> [[0.51155996 0.48844004]]
Recommended Posts