Last time, I created a model of handwriting recognition on CNN and tuned hyperparameters using grid search. From this, we learned how to obtain a highly accurate model with the values of activation, optimizer, epoch, and batch_size. This time, I would like to verify the filter attribute of CNN.
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Activation, Conv2D, Dense, Flatten, MaxPooling2D, Reshape, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import accuracy_score
from collections import OrderedDict
import pandas as pd
import os
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#Pixel value 0~Normalize between 1
X_train = X_train / 255.0
X_test = X_test / 255.0
class CNNModel:
def __init__(self, hid_dim_0=32, hid_dim_1=64):
self.input = Input(shape=(28, 28), name='Input')
self.reshape = Reshape(target_shape=(28, 28, 1), name='Reshape')
self.layers = OrderedDict()
self.layers['conv_0'] = Conv2D(hid_dim_0, (3, 3), strides=(1, 1), name='Conv_0')
self.layers['pool_0'] = MaxPooling2D((2, 2), strides=(1, 1), name='Pool_0')
self.layers['conv_1'] = Conv2D(hid_dim_1, (3, 3), strides=(1, 1), name='Conv_1')
self.layers['pool_1'] = MaxPooling2D((2, 2), strides=(1, 1), name='Pool_1')
self.layers['flatten'] = Flatten()
self.layers['dense_0'] = Dense(256, activation='relu')
self.layers['dense_1'] = Dense(128, activation='relu')
self.layers['dense_2'] = Dense(64, activation='relu')
self.last = Dense(10, activation='softmax', name='last')
def build(self):
x = self.input
z = self.reshape(x)
for layer in self.layers.values():
z = layer(z)
p = self.last(z)
model = Model(inputs=x, outputs=p)
return model
dim_hidden_layers = [2**i for i in range(4, 8)]
The number of parameters and accuracy (correct answer rate) are stored in a variable called df_accuracy
.
df_accuracy = pd.DataFrame()
for hid_dim_0 in dim_hidden_layres:
for hid_dim_1 in dim_hidden_layres:
print('========', 'hid_dim_0:', hid_dim_0, '; hid_dim_1:', hid_dim_1, '========')
model = CNNModel(hid_dim_0=hid_dim_0, hid_dim_1=hid_dim_1)
model = model.build()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
callbacks = [
EarlyStopping(patience=3),
ModelCheckpoint(filepath=os.path.join('models', 'CNN', 'model_{}_{}.h5'.format(hid_dim_0, hid_dim_1)), save_best_only=True),
]
n_param = model.count_params()
model.fit(x=X_train, y=y_train, batch_size=64, epochs=20, callbacks=callbacks, validation_split=0.1)
acc = accuracy_score(y_test, model.predict(X_test).argmax(axis=-1))
df_accuracy = pd.concat([df_accuracy, pd.DataFrame([[hid_dim_0, hid_dim_1, n_param, acc]], columns=['hid_dim_0', 'hid_dim_1', 'n_param', 'accuracy'])])
display(df_accuracy.set_index(['hid_dim_0', 'hid_dim_1'])[['n_param']].unstack())
display(df_accuracy.set_index(['hid_dim_0', 'hid_dim_1'])[['accuracy']].unstack())
It seems that there is not much difference between them, but I found that the combination of 128
and 64
is the most accurate model.
model = CNNModel(hid_dim_0=128, hid_dim_1=64)
model = model.build()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
history = model.fit(x=X_train, y=y_train, batch_size=64, epochs=20, callbacks=callbacks, validation_split=0.1)
print(model.evaluate(X_test, y_test)) # [0.09829007089138031, 0.9854999780654907]
Therefore, we have a model with an accuracy of about 98.5%. It is a good model with high accuracy.
In this verification, the result is that there is not much difference in the filter value, but I am wondering what happens in other cases. Next time, I would like to investigate the depth of the layers. Thank you for reading until the end.