Here are some useful tips for using Google Colab.
When using Google Colab Pro, there is a limit of up to 24 hours of execution time. Therefore, if the amount of calculation of the model exceeds 24 hours, there is a problem that the calculation result disappears in the middle.
For example, if you estimate that it takes about 24 hours to calculate 200 epochs, and then actually run the calculation, it will take a little time, and Google Colab may be disconnected near 190 epochs.
To solve this, we will adopt the following method.
python
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint(filepath = 'XXX.h5',
monitor='loss',
save_best_only=True,
save_weight_only=False,
mode='min'
period=1)
1.filepath: The path to save the character string and model file 2. monitor: Value to monitor 3. save_best_only: If save_best_only = True, the monitored data will not overwrite the latest best model 4.mode: One of {auto, min, max} will be selected 5. save_weights_only: If True, the weights of the model will be saved. Otherwise, the entire model will be saved. 6.period: Interval between checkpoints (number of epochs)
Write the code using the Keras MNIST case study.
python
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.datasets import mnist
Google Drive Mount, model save folder settings
python
from google.colab import drive
drive.mount('/content/drive')
MODEL_DIR = "/content/drive/My Drive/temp"
if not os.path.exists(MODEL_DIR): #If the directory does not exist, create it.
os.makedirs(MODEL_DIR)
checkpoint = ModelCheckpoint(
filepath=os.path.join(MODEL_DIR, "model-{epoch:02d}.h5"), save_best_only=True)
python
history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_split=0.1, callbacks=[checkpoint])
Running the above code will save the model file in the temp folder.
It starts by calling model-05.h5.
python
#Model loading
model.load_weights(os.path.join(MODEL_DIR, "model-05.h5")) #Specify the model of
Change model-XX.h to model_new-XX.h.
python
if not os.path.exists(MODEL_DIR): #If the directory does not exist, create it.
os.makedirs(MODEL_DIR)
checkpoint = ModelCheckpoint(
filepath=os.path.join(MODEL_DIR, "model_new-{epoch:02d}.h5"),
monitor = 'loss',
save_best_only=True,
mode='min',
period=1)
python
history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_split=0.1, callbacks=[checkpoint])
Looking at the value of Training acc, we can see that training has resumed since the last training was completed.
The newly trained model is also saved.
python
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import os
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
MODEL_DIR = "/content/drive/My Drive/temp"
if not os.path.exists(MODEL_DIR): #If the directory does not exist, create it.
os.makedirs(MODEL_DIR)
checkpoint = ModelCheckpoint(
filepath=os.path.join(MODEL_DIR, "model-{epoch:02d}.h5"), save_best_only=True)
BATCH_SIZE = 128
NUM_EPOCHS = 20
(Xtrain, ytrain), (Xtest, ytest) = mnist.load_data()
Xtrain = Xtrain.reshape(60000, 784).astype("float32") / 255
Xtest = Xtest.reshape(10000, 784).astype("float32") / 255
Ytrain = to_categorical(ytrain, 10)
Ytest = to_categorical(ytest, 10)
print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)
#Model definition
model = Sequential()
model.add(Dense(512, input_shape=(784,), activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(10, activation="softmax"))
model.summary()
model.compile(optimizer="rmsprop", loss="categorical_crossentropy",
metrics=["accuracy"])
#Learning execution
history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_split=0.1, callbacks=[checkpoint])
#Graph drawing
plt.clf()
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
plot_epochs = range(1, len(acc)+1)
# Accuracy
plt.plot(plot_epochs, acc, 'bo-', label='Training acc')
plt.plot(plot_epochs, val_acc, 'b', label='Validation acc')
plt.title('model accuracy')
plt.ylabel('accuracy') #Y-axis label
plt.xlabel('epoch') #X-axis label
plt.legend()
plt.show()
loss = history.history['loss']
val_loss = history.history['val_loss']
plot_epochs = range(1, len(loss)+1)
# Accuracy
plt.plot(plot_epochs, loss, 'ro-', label='Training loss')
plt.plot(plot_epochs, val_loss, 'r', label='Validation loss')
plt.title('model loss')
plt.ylabel('loss') #Y-axis label
plt.xlabel('epoch') #X-axis label
plt.legend()
plt.show()
python
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import os
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
MODEL_DIR = "/content/drive/My Drive/temp"
if not os.path.exists(MODEL_DIR): #If the directory does not exist, create it.
os.makedirs(MODEL_DIR)
checkpoint = ModelCheckpoint(
filepath=os.path.join(MODEL_DIR, "model-{epoch:02d}.h5"), save_best_only=True)
#Model loading
model.load_weights(os.path.join(MODEL_DIR, "model-05.h5")) #Specify the model of
if not os.path.exists(MODEL_DIR): #If the directory does not exist, create it.
os.makedirs(MODEL_DIR)
checkpoint = ModelCheckpoint(
filepath=os.path.join(MODEL_DIR, "model_new-{epoch:02d}.h5"),
monitor = 'loss',
save_best_only=True,
mode='min',
period=1)
#Resume learning
history = model.fit(Xtrain, Ytrain, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, validation_split=0.1, callbacks=[checkpoint])
#Graph drawing
plt.clf()
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
plot_epochs = range(1, len(acc)+1)
# Accuracy
plt.plot(plot_epochs, acc, 'bo-', label='Training acc')
plt.plot(plot_epochs, val_acc, 'b', label='Validation acc')
plt.title('model accuracy')
plt.ylabel('accuracy') #Y-axis label
plt.xlabel('epoch') #X-axis label
plt.legend()
plt.show()
loss = history.history['loss']
val_loss = history.history['val_loss']
plot_epochs = range(1, len(loss)+1)
# Accuracy
plt.plot(plot_epochs, loss, 'ro-', label='Training loss')
plt.plot(plot_epochs, val_loss, 'r', label='Validation loss')
plt.title('model loss')
plt.ylabel('loss') #Y-axis label
plt.xlabel('epoch') #X-axis label
plt.legend()
plt.show()
Recommended Posts