Most of the above answers covered important points. If you are using recent Tensorflow (TF2.1
or above), Then the following example will help you. The model part of the code is from Tensorflow website.
import tensorflow as tf from tensorflow import keras mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 def create_model(): model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape = (28, 28)), tf.keras.layers.Dense(512, activation = tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation = tf.nn.softmax) ]) model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy']) return model # Create a basic model instance model = create_model() model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)
Please save the model in *.tf format. From my experience, if you have any custom_loss defined, *.h5 format will not save optimizer status and hence will not serve your purpose if you want to retrain the model from where we left.
# saving the model in tensorflow format model.save('./MyModel_tf', save_format = 'tf') # loading the saved model loaded_model = tf.keras.models.load_model('./MyModel_tf') # retraining the model loaded_model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)
The problem might be that you use a different optimizer - or different arguments to your optimizer. I just had the same issue with a custom pretrained model, using
reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor = lr_reduction_factor,
patience = patience, min_lr = min_lr, verbose = 1)
If you are using TF2, use the new saved_model method(format pb). More information available here and here.
model.fit(x = X_train, y = y_train, epochs = 10, callbacks = [model_callback]) #your first training
tf.saved_model.save(model, save_to_dir_path) #save the model
del model #to delete the model
model = tf.keras.models.load_model(save_to_dir_path)
model.fit(x = X_train, y = y_train, epochs = 10, callbacks = [model_callback]) #your second training
Here is the code have a look:
from keras.models
import load_model
model = load_model('/content/drive/MyDrive/CustomResNet/saved_models/model_1.h5')
history = model.fit(train_gen, validation_data = valid_gen, epochs = 5)
Last updated 2022-06-16 UTC.
Install and import TensorFlow and dependencies:
pip install pyyaml h5py # Required to save models in HDF5 format
pip install pyyaml h5py # Required to save models in HDF5 format
import os
import tensorflow as tf
from tensorflow
import keras
print(tf.version.VERSION)
import os
import tensorflow as tf
from tensorflow import keras
print(tf.version.VERSION)
2.9 .1
Downloading data from https: //storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11490434 / 11490434[ === === === === === === === === === === ] - 0 s 0 us / step
Start by building a simple sequential model:
# Define a simple sequential model def create_model(): model = tf.keras.Sequential([ keras.layers.Dense(512, activation = 'relu', input_shape = (784, )), keras.layers.Dropout(0.2), keras.layers.Dense(10) ]) model.compile(optimizer = 'adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True), metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]) return model # Create a basic model instance model = create_model() # Display the model 's architecture model.summary()
September 23, 2019 at 9:03 pm,September 30, 2019 at 2:48 pm,September 29, 2019 at 2:49 am,September 25, 2019 at 8:44 am
Let’s review our project structure:
$ tree--dirsfirst .├──output│├── checkpoints│└── resnet_fashion_mnist.png├── pyimagesearch│├── callbacks││├── __init__.py││├── epochcheckpoint.py││└── trainingmonitor.py│├── nn││├── __init__.py││└── resnet.py│└── __init__.py└── train.py 5 directories, 8 files
Open up a new file, name it train.py
, and insert the following code:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
#
import the necessary packages
from pyimagesearch.callbacks.epochcheckpoint
import EpochCheckpoint
from pyimagesearch.callbacks.trainingmonitor
import TrainingMonitor
from pyimagesearch.nn.resnet
import ResNet
from sklearn.preprocessing
import LabelBinarizer
from tensorflow.keras.preprocessing.image
import ImageDataGenerator
from tensorflow.keras.optimizers
import SGD
from tensorflow.keras.datasets
import fashion_mnist
from tensorflow.keras.models
import load_model
import tensorflow.keras.backend as K
import numpy as np
import argparse
import cv2
import sys
import os
Now let’s go ahead and parse command line arguments:
# construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-c", "--checkpoints", required = True, help = "path to output checkpoint directory") ap.add_argument("-m", "--model", type = str, help = "path to *specific* model checkpoint to load") ap.add_argument("-s", "--start-epoch", type = int, default = 0, help = "epoch to restart training at") args = vars(ap.parse_args())
From here we’ll (1) binarize our labels, and (2) initialize our data augmentation object:
# convert the labels from integers to vectors lb = LabelBinarizer() trainY = lb.fit_transform(trainY) testY = lb.transform(testY) # construct the image generator for data augmentation aug = ImageDataGenerator(width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True, fill_mode = "nearest")
And now to the code for loading model checkpoints:
# if there is no specific model checkpoint supplied, then initialize # the network(ResNet - 56) and compile the model if args["model"] is None: print("[INFO] compiling model...") opt = SGD(lr = 1e-1) model = ResNet.build(32, 32, 1, 10, (9, 9, 9), (64, 64, 128, 256), reg = 0.0001) model.compile(loss = "categorical_crossentropy", optimizer = opt, metrics = ["accuracy"]) # otherwise, we 're using a checkpoint model else: # load the checkpoint from disk print("[INFO] loading {}...".format(args["model"])) model = load_model(args["model"]) # update the learning rate print("[INFO] old learning rate: {}".format( K.get_value(model.optimizer.lr))) K.set_value(model.optimizer.lr, 1e-2) print("[INFO] new learning rate: {}".format( K.get_value(model.optimizer.lr)))
I was wondering if it was possible to save a partly trained Keras model and continue the training after loading the model again.,Actually - model.save saves all information need for restarting training in your case. The only thing which could be spoiled by reloading model is your optimizer state. To check that - try to save and reload model and train it on training data.,After saving, deleting and reloading the model the loss and accuracy of the model trained on the second dataset will be 0.1711 and 0.9504 respectively.,The reason for this is that I will have more training data in the future and I do not want to retrain the whole model again.
The functions which I am using are:
#Partly train model
model.fit(first_training, first_classes, batch_size = 32, nb_epoch = 20)
#Save partly trained model
model.save('partly_trained.h5')
#Load partly trained model
from keras.models
import load_model
model = load_model('partly_trained.h5')
#Continue training
model.fit(second_training, second_classes, batch_size = 32, nb_epoch = 20)
Is this caused by the new training data or by a completely re-trained model?
"" " Model by: http: //machinelearningmastery.com/ "" " # load(downloaded if needed) the MNIST dataset import numpy from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense from keras.utils import np_utils from keras.models import load_model numpy.random.seed(7) def baseline_model(): model = Sequential() model.add(Dense(num_pixels, input_dim = num_pixels, init = 'normal', activation = 'relu')) model.add(Dense(num_classes, init = 'normal', activation = 'softmax')) model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy']) return model if __name__ == '__main__': # load data(X_train, y_train), (X_test, y_test) = mnist.load_data() # flatten 28 * 28 images to a 784 vector for each image num_pixels = X_train.shape[1] * X_train.shape[2] X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32') X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32') # normalize inputs from 0 - 255 to 0 - 1 X_train = X_train / 255 X_test = X_test / 255 # one hot encode outputs y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) num_classes = y_test.shape[1] # build the model model = baseline_model() #Partly train model dataset1_x = X_train[: 3000] dataset1_y = y_train[: 3000] model.fit(dataset1_x, dataset1_y, nb_epoch = 10, batch_size = 200, verbose = 2) # Final evaluation of the model scores = model.evaluate(X_test, y_test, verbose = 0) print("Baseline Error: %.2f%%" % (100 - scores[1] * 100)) #Save partly trained model model.save('partly_trained.h5') del model #Reload model model = load_model('partly_trained.h5') #Continue training dataset2_x = X_train[3000: ] dataset2_y = y_train[3000: ] model.fit(dataset2_x, dataset2_y, nb_epoch = 10, batch_size = 200, verbose = 2) scores = model.evaluate(X_test, y_test, verbose = 0) print("Baseline Error: %.2f%%" % (100 - scores[1] * 100))
For tensorflow.keras change the parameter nb_epochs to epochs in the model fit. The imports and basemodel function are:
import numpy
from tensorflow.keras.datasets
import mnist
from tensorflow.keras.models
import Sequential
from tensorflow.keras.layers
import Dense
from tensorflow.keras.utils
import to_categorical
from tensorflow.keras.models
import load_model
numpy.random.seed(7)
def baseline_model():
model = Sequential()
model.add(Dense(num_pixels, input_dim = num_pixels, activation = 'relu'))
model.add(Dense(num_classes, activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model
Actually - model.save saves all information need for restarting training in your case. The only thing which could be spoiled by reloading model is your optimizer state. To check that - try to save and reload model and train it on training data.,for the pretrained model, whereby the original learning rate starts at 0.0003 and during pre-training it is reduced to the min_learning rate, which is 0.000003,This approach will restart the training where we left before saving the model. As mentioned by others, if you want to save weights of best model or you want to save weights of model every epoch you need to use keras callbacks function (ModelCheckpoint) with options such as save_weights_only=True, save_freq='epoch', and save_best_only.,Please save the model in *.tf format. From my experience, if you have any custom_loss defined, *.h5 format will not save optimizer status and hence will not serve your purpose if you want to retrain the model from where we left.
Most of the above answers covered important points. If you are using recent Tensorflow (TF2.1
or above), Then the following example will help you. The model part of the code is from Tensorflow website.
import tensorflow as tffrom tensorflow import kerasmnist = tf.keras.datasets.mnist(x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 def create_model(): model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape = (28, 28)), tf.keras.layers.Dense(512, activation = tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation = tf.nn.softmax)]) model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy']) return model # Create a basic model instancemodel = create_model() model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)
Please save the model in *.tf format. From my experience, if you have any custom_loss defined, *.h5 format will not save optimizer status and hence will not serve your purpose if you want to retrain the model from where we left.
# saving the model in tensorflow formatmodel.save('./MyModel_tf', save_format = 'tf') # loading the saved modelloaded_model = tf.keras.models.load_model('./MyModel_tf') # retraining the modelloaded_model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)
The problem might be that you use a different optimizer - or different arguments to your optimizer. I just had the same issue with a custom pretrained model, using
reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor = lr_reduction_factor, patience = patience, min_lr = min_lr, verbose = 1)