how to load a tensorflow model and continue training

  • Last Update :
  • Techknowledgy :

Most of the above answers covered important points. If you are using recent Tensorflow (TF2.1 or above), Then the following example will help you. The model part of the code is from Tensorflow website.

import tensorflow as tf
from tensorflow
import keras
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

def create_model():
   model = tf.keras.models.Sequential([
      tf.keras.layers.Flatten(input_shape = (28, 28)),
      tf.keras.layers.Dense(512, activation = tf.nn.relu),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(10, activation = tf.nn.softmax)
   ])

model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
return model

# Create a basic model instance
model = create_model()
model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)

Please save the model in *.tf format. From my experience, if you have any custom_loss defined, *.h5 format will not save optimizer status and hence will not serve your purpose if you want to retrain the model from where we left.

# saving the model in tensorflow format
model.save('./MyModel_tf', save_format = 'tf')

# loading the saved model
loaded_model = tf.keras.models.load_model('./MyModel_tf')

# retraining the model
loaded_model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)

The problem might be that you use a different optimizer - or different arguments to your optimizer. I just had the same issue with a custom pretrained model, using

reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor = lr_reduction_factor,
   patience = patience, min_lr = min_lr, verbose = 1)

If you are using TF2, use the new saved_model method(format pb). More information available here and here.

model.fit(x = X_train, y = y_train, epochs = 10, callbacks = [model_callback]) #your first training
tf.saved_model.save(model, save_to_dir_path) #save the model
del model #to delete the model
model = tf.keras.models.load_model(save_to_dir_path)
model.fit(x = X_train, y = y_train, epochs = 10, callbacks = [model_callback]) #your second training

Here is the code have a look:

from keras.models
import load_model
model = load_model('/content/drive/MyDrive/CustomResNet/saved_models/model_1.h5')
history = model.fit(train_gen, validation_data = valid_gen, epochs = 5)

Suggestion : 2

Last updated 2022-06-16 UTC.

Install and import TensorFlow and dependencies:

pip install pyyaml h5py # Required to save models in HDF5 format
pip install pyyaml h5py  # Required to save models in HDF5 format
import os

import tensorflow as tf
from tensorflow
import keras

print(tf.version.VERSION)
import os

import tensorflow as tf
from tensorflow import keras

print(tf.version.VERSION)
2.9 .1
Downloading data from https: //storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
   11490434 / 11490434[ === === === === === === === === === === ] - 0 s 0 us / step

Start by building a simple sequential model:

# Define a simple sequential model
def create_model():
   model = tf.keras.Sequential([
      keras.layers.Dense(512, activation = 'relu', input_shape = (784, )),
      keras.layers.Dropout(0.2),
      keras.layers.Dense(10)
   ])

model.compile(optimizer = 'adam',
   loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
   metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])

return model

# Create a basic model instance
model = create_model()

# Display the model 's architecture
model.summary()

Suggestion : 3

September 23, 2019 at 9:03 pm,September 30, 2019 at 2:48 pm,September 29, 2019 at 2:49 am,September 25, 2019 at 8:44 am

Let’s review our project structure:

$ tree--dirsfirst
   .├──output│├── checkpoints│└── resnet_fashion_mnist.png├── pyimagesearch│├── callbacks││├── __init__.py││├── epochcheckpoint.py││└── trainingmonitor.py│├── nn││├── __init__.py││└── resnet.py│└── __init__.py└── train.py

5 directories, 8 files

Open up a new file, name it train.py, and insert the following code:

# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")

#
import the necessary packages
from pyimagesearch.callbacks.epochcheckpoint
import EpochCheckpoint
from pyimagesearch.callbacks.trainingmonitor
import TrainingMonitor
from pyimagesearch.nn.resnet
import ResNet
from sklearn.preprocessing
import LabelBinarizer
from tensorflow.keras.preprocessing.image
import ImageDataGenerator
from tensorflow.keras.optimizers
import SGD
from tensorflow.keras.datasets
import fashion_mnist
from tensorflow.keras.models
import load_model
import tensorflow.keras.backend as K
import numpy as np
import argparse
import cv2
import sys
import os

Now let’s go ahead and parse command line arguments:

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--checkpoints", required = True,
   help = "path to output checkpoint directory")
ap.add_argument("-m", "--model", type = str,
   help = "path to *specific* model checkpoint to load")
ap.add_argument("-s", "--start-epoch", type = int,
   default = 0,
   help = "epoch to restart training at")
args = vars(ap.parse_args())

From here we’ll (1) binarize our labels, and (2) initialize our data augmentation object:

# convert the labels from integers to vectors
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)

# construct the image generator
for data augmentation
aug = ImageDataGenerator(width_shift_range = 0.1,
   height_shift_range = 0.1, horizontal_flip = True,
   fill_mode = "nearest")

And now to the code for loading model checkpoints:

#
if there is no specific model checkpoint supplied, then initialize
# the network(ResNet - 56) and compile the model
if args["model"] is None:
   print("[INFO] compiling model...")
opt = SGD(lr = 1e-1)
model = ResNet.build(32, 32, 1, 10, (9, 9, 9),
   (64, 64, 128, 256), reg = 0.0001)
model.compile(loss = "categorical_crossentropy", optimizer = opt,
   metrics = ["accuracy"])

# otherwise, we 're using a checkpoint model
else:
   # load the checkpoint from disk
print("[INFO] loading {}...".format(args["model"]))
model = load_model(args["model"])

# update the learning rate
print("[INFO] old learning rate: {}".format(
   K.get_value(model.optimizer.lr)))
K.set_value(model.optimizer.lr, 1e-2)
print("[INFO] new learning rate: {}".format(
   K.get_value(model.optimizer.lr)))

Suggestion : 4

I was wondering if it was possible to save a partly trained Keras model and continue the training after loading the model again.,Actually - model.save saves all information need for restarting training in your case. The only thing which could be spoiled by reloading model is your optimizer state. To check that - try to save and reload model and train it on training data.,After saving, deleting and reloading the model the loss and accuracy of the model trained on the second dataset will be 0.1711 and 0.9504 respectively.,The reason for this is that I will have more training data in the future and I do not want to retrain the whole model again.

The functions which I am using are:

#Partly train model
model.fit(first_training, first_classes, batch_size = 32, nb_epoch = 20)

#Save partly trained model
model.save('partly_trained.h5')

#Load partly trained model
from keras.models
import load_model
model = load_model('partly_trained.h5')

#Continue training
model.fit(second_training, second_classes, batch_size = 32, nb_epoch = 20)

Is this caused by the new training data or by a completely re-trained model?

""
"
Model by: http: //machinelearningmastery.com/
   ""
"
# load(downloaded
   if needed) the MNIST dataset
import numpy
from keras.datasets
import mnist
from keras.models
import Sequential
from keras.layers
import Dense
from keras.utils
import np_utils
from keras.models
import load_model
numpy.random.seed(7)

def baseline_model():
   model = Sequential()
model.add(Dense(num_pixels, input_dim = num_pixels, init = 'normal', activation = 'relu'))
model.add(Dense(num_classes, init = 'normal', activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model

if __name__ == '__main__':
   # load data(X_train, y_train), (X_test, y_test) = mnist.load_data()

# flatten 28 * 28 images to a 784 vector
for each image
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')
# normalize inputs from 0 - 255 to 0 - 1
X_train = X_train / 255
X_test = X_test / 255
# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

# build the model
model = baseline_model()

#Partly train model
dataset1_x = X_train[: 3000]
dataset1_y = y_train[: 3000]
model.fit(dataset1_x, dataset1_y, nb_epoch = 10, batch_size = 200, verbose = 2)

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose = 0)
print("Baseline Error: %.2f%%" % (100 - scores[1] * 100))

#Save partly trained model
model.save('partly_trained.h5')
del model

#Reload model
model = load_model('partly_trained.h5')

#Continue training
dataset2_x = X_train[3000: ]
dataset2_y = y_train[3000: ]
model.fit(dataset2_x, dataset2_y, nb_epoch = 10, batch_size = 200, verbose = 2)
scores = model.evaluate(X_test, y_test, verbose = 0)
print("Baseline Error: %.2f%%" % (100 - scores[1] * 100))

For tensorflow.keras change the parameter nb_epochs to epochs in the model fit. The imports and basemodel function are:

import numpy
from tensorflow.keras.datasets
import mnist
from tensorflow.keras.models
import Sequential
from tensorflow.keras.layers
import Dense
from tensorflow.keras.utils
import to_categorical
from tensorflow.keras.models
import load_model

numpy.random.seed(7)

def baseline_model():
   model = Sequential()
model.add(Dense(num_pixels, input_dim = num_pixels, activation = 'relu'))
model.add(Dense(num_classes, activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model

Suggestion : 5

Actually - model.save saves all information need for restarting training in your case. The only thing which could be spoiled by reloading model is your optimizer state. To check that - try to save and reload model and train it on training data.,for the pretrained model, whereby the original learning rate starts at 0.0003 and during pre-training it is reduced to the min_learning rate, which is 0.000003,This approach will restart the training where we left before saving the model. As mentioned by others, if you want to save weights of best model or you want to save weights of model every epoch you need to use keras callbacks function (ModelCheckpoint) with options such as save_weights_only=True, save_freq='epoch', and save_best_only.,Please save the model in *.tf format. From my experience, if you have any custom_loss defined, *.h5 format will not save optimizer status and hence will not serve your purpose if you want to retrain the model from where we left.

Most of the above answers covered important points. If you are using recent Tensorflow (TF2.1 or above), Then the following example will help you. The model part of the code is from Tensorflow website.

import tensorflow as tffrom tensorflow
import kerasmnist = tf.keras.datasets.mnist(x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 def create_model(): model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape = (28, 28)), tf.keras.layers.Dense(512, activation = tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation = tf.nn.softmax)]) model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy']) return model # Create a basic model instancemodel = create_model() model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)

Please save the model in *.tf format. From my experience, if you have any custom_loss defined, *.h5 format will not save optimizer status and hence will not serve your purpose if you want to retrain the model from where we left.

# saving the model in tensorflow formatmodel.save('./MyModel_tf', save_format = 'tf') # loading the saved modelloaded_model = tf.keras.models.load_model('./MyModel_tf') # retraining the modelloaded_model.fit(x_train, y_train, epochs = 10, validation_data = (x_test, y_test), verbose = 1)

The problem might be that you use a different optimizer - or different arguments to your optimizer. I just had the same issue with a custom pretrained model, using

reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor = lr_reduction_factor, patience = patience, min_lr = min_lr, verbose = 1)