Data augmentation code:
print('Using real-time data augmentation.') # this will do preprocessing and realtime data augmentation datagen = ImageDataGenerator( featurewise_center = False, # set input mean to 0 over the dataset samplewise_center = False, # set each sample mean to 0 featurewise_std_normalization = False, # divide inputs by std of the dataset samplewise_std_normalization = False, # divide each input by its std zca_whitening = True, # apply ZCA whitening rotation_range = 90, # randomly rotate images in the range(degrees, 0 to 180) width_shift_range = 0.1, # randomly shift images horizontally(fraction of total width) height_shift_range = 0.1, # randomly shift images vertically(fraction of total height) horizontal_flip = True, # randomly flip images vertical_flip = False) # randomly flip images # compute quantities required for featurewise normalization #(std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) # fit the model on the batches generated by datagen.flow() model.fit_generator(datagen.flow(X_train, Y_train, batch_size = batch_size), samples_per_epoch = X_train.shape[0], nb_epoch = nb_epoch)
Besides, the training loss that Keras displays is the average of the losses for each batch of training data, over the current epoch. Because your model is changing over time, the loss over the first batches of an epoch is generally higher than over the last batches. This can bring the epoch-wise average down. On the other hand, the testing loss for an epoch is computed using the model as it is at the end of the epoch, resulting in a lower loss.,Naturally, this is not possible with models that are subclasses of Model that override call.,The image data format to be used as default by image processing layers and utilities (either channels_last or channels_first).,To ensure the ability to recover from an interrupted training run at any time (fault tolerance), you should use a tf.keras.callbacks.experimental.BackupAndRestore that regularly saves your training progress, including the epoch number and weights, to disk, and loads it the next time you call Model.fit().
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
# This could be any kind of model--Functional, subclass...
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation = 'relu', input_shape = (28, 28, 1)),
tf.keras.layers.GlobalMaxPooling2D(),
tf.keras.layers.Dense(10)
])
model.compile(loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
optimizer = tf.keras.optimizers.Adam(),
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()])
model.fit(train_dataset, epochs = 12, callbacks = callbacks)
# Model where a shared LSTM is used to encode two different sequences in parallel input_a = keras.Input(shape = (140, 256)) input_b = keras.Input(shape = (140, 256)) shared_lstm = keras.layers.LSTM(64) # Process the first sequence on one GPU with tf.device_scope('/gpu:0'): encoded_a = shared_lstm(input_a) # Process the next sequence on another GPU with tf.device_scope('/gpu:1'): encoded_b = shared_lstm(input_b) # Concatenate results on CPU with tf.device_scope('/cpu:0'): merged_vector = keras.layers.concatenate( [encoded_a, encoded_b], axis = -1)
cluster_resolver = ... if cluster_resolver.task_type in ("worker", "ps"): # Start a[`tf.distribute.Server`](https: //www.tensorflow.org/api_docs/python/tf/distribute/Server) and wait. ... elif cluster_resolver.task_type == "evaluator": # Run an(optional) side - car evaluation ... # Otherwise, this is the coordinator that controls the training w / the strategy.strategy = tf.distribute.experimental.ParameterServerStrategy( cluster_resolver = ...) train_dataset = ... with strategy.scope(): model = tf.keras.Sequential([ layers.Conv2D(32, 3, activation = 'relu', input_shape = (28, 28, 1)), layers.MaxPooling2D(), layers.Flatten(), layers.Dense(64, activation = 'relu'), layers.Dense(10, activation = 'softmax') ]) model.compile( loss = 'sparse_categorical_crossentropy', optimizer = tf.keras.optimizers.SGD(learning_rate = 0.001), metrics = ['accuracy'], steps_per_execution = 10) model.fit(x = train_dataset, epochs = 3, steps_per_epoch = 100)
# By default `MultiWorkerMirroredStrategy` uses cluster information # from `TF_CONFIG`, and "AUTO" collective op communication. strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() train_dataset = get_training_dataset() with strategy.scope(): # Define and compile the model in the scope of the strategy.Doing so # ensures the variables created are distributed and initialized properly # according to the strategy. model = tf.keras.Sequential([ layers.Conv2D(32, 3, activation = 'relu', input_shape = (28, 28, 1)), layers.MaxPooling2D(), layers.Flatten(), layers.Dense(64, activation = 'relu'), layers.Dense(10, activation = 'softmax') ]) model.compile( loss = 'sparse_categorical_crossentropy', optimizer = tf.keras.optimizers.SGD(learning_rate = 0.001), metrics = ['accuracy']) model.fit(x = train_dataset, epochs = 3, steps_per_epoch = 100)
Updated July 21st, 2022
In this example, we’re defining the loss function by creating an instance of the loss class. Using the class is advantageous because you can pass some additional parameters.
from tensorflow
import keras
from tensorflow.keras
import layers
model = keras.Sequential()
model.add(layers.Dense(64, kernel_initializer = 'uniform', input_shape = (10, )))
model.add(layers.Activation('softmax'))
loss_function = keras.losses.SparseCategoricalCrossentropy(from_logits = True)
model.compile(loss = loss_function, optimizer = 'adam')
If you want to use a loss function that is built into Keras without specifying any parameters you can just use the string alias as shown below:
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam')
The Binary Cross entropy will calculate the cross-entropy loss between the predicted classes and the true classes. By default, the sum_over_batch_size reduction is used. This means that the loss will return the average of the per-sample losses in the batch.
y_true = [
[0., 1.],
[0.2, 0.8],
[0.3, 0.7],
[0.4, 0.6]
]
y_pred = [
[0.6, 0.4],
[0.4, 0.6],
[0.6, 0.4],
[0.8, 0.2]
]
bce = tf.keras.losses.BinaryCrossentropy(reduction = 'sum_over_batch_size')
bce(y_true, y_pred).numpy()
Using the reduction as none returns the full array of the per-sample losses.
bce = tf.keras.losses.BinaryCrossentropy(reduction = 'none')
bce(y_true, y_pred).numpy()
array([0.9162905, 0.5919184, 0.79465103, 1.0549198], dtype = float32)
The CategoricalCrossentropy also computes the cross-entropy loss between the true classes and predicted classes. The labels are given in an one_hot format.
cce = tf.keras.losses.CategoricalCrossentropy() cce(y_true, y_pred).numpy()
If you are using Stochastic Gradient Descent, then it is very likely that you are going to face the exploding gradients problem. One way to tackle it is by Scheduling Learning Rate after some epochs, but now due to more advancements and research it has been proven that using a per-parameter adaptive learning rate algorithm like Adam optimizer, you no longer need to schedule the learning rate.,Hence it is advisable to use a lower amount of Learning Rate. It can also be improved using Hyper Parameter Tuning.,The Mean Absolute error uses the scale-dependent accuracy measure which means that it uses the same scale which is being used by the data being measured, thus it can not be used in making comparisons between series that are using different scales.,There are 2 most commonly used scaling methods, and both of them are easily implementable in sklearn which is a famous Machine Learning Library in Python.
import keras
import numpy as np
y_true = np.array([
[10.0, 7.0]
]) #sample data
y_pred = np.array([
[8.0, 6.0]
])
a = keras.losses.MSE(y_true, y_pred)
print(f 'Value of Mean Squared Error is {a.numpy()}')
model = keras.Sequential([
keras.layers.Dense(10, input_shape = (1, ), activation = 'relu'),
keras.layers.Dense(1)
])
model.compile(loss = 'mse', optimizer = 'adam')
model.fit(np.array([ [10.0], [20.0], [30.0], [40.0], [50.0], [60.0], [10.0], [20.0] ]), np.array([6, 12, 18, 24, 30, 36, 6, 12]), epochs = 10)
import keras
import numpy as np
y_true = np.array([
[10.0, 7.0]
]) #dummy data
y_pred = np.array([
[8.0, 6.0]
])
c = keras.losses.MAE(y_true, y_pred) #calculating loss
print(f 'Value of Mean Absolute Error is {c.numpy()}')
model = keras.models.Sequential([
keras.layers.Dense(10, input_shape = (1, ), activation = 'relu'),
keras.layers.Dense(1)
])
September 23, 2019 at 9:03 pm,September 30, 2019 at 2:48 pm,September 29, 2019 at 2:49 am,September 25, 2019 at 8:44 am
Let’s review our project structure:
$ tree--dirsfirst .├──output│├── checkpoints│└── resnet_fashion_mnist.png├── pyimagesearch│├── callbacks││├── __init__.py││├── epochcheckpoint.py││└── trainingmonitor.py│├── nn││├── __init__.py││└── resnet.py│└── __init__.py└── train.py 5 directories, 8 files
Open up a new file, name it train.py
, and insert the following code:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")
#
import the necessary packages
from pyimagesearch.callbacks.epochcheckpoint
import EpochCheckpoint
from pyimagesearch.callbacks.trainingmonitor
import TrainingMonitor
from pyimagesearch.nn.resnet
import ResNet
from sklearn.preprocessing
import LabelBinarizer
from tensorflow.keras.preprocessing.image
import ImageDataGenerator
from tensorflow.keras.optimizers
import SGD
from tensorflow.keras.datasets
import fashion_mnist
from tensorflow.keras.models
import load_model
import tensorflow.keras.backend as K
import numpy as np
import argparse
import cv2
import sys
import os
Now let’s go ahead and parse command line arguments:
# construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-c", "--checkpoints", required = True, help = "path to output checkpoint directory") ap.add_argument("-m", "--model", type = str, help = "path to *specific* model checkpoint to load") ap.add_argument("-s", "--start-epoch", type = int, default = 0, help = "epoch to restart training at") args = vars(ap.parse_args())
From here we’ll (1) binarize our labels, and (2) initialize our data augmentation object:
# convert the labels from integers to vectors lb = LabelBinarizer() trainY = lb.fit_transform(trainY) testY = lb.transform(testY) # construct the image generator for data augmentation aug = ImageDataGenerator(width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True, fill_mode = "nearest")
And now to the code for loading model checkpoints:
# if there is no specific model checkpoint supplied, then initialize # the network(ResNet - 56) and compile the model if args["model"] is None: print("[INFO] compiling model...") opt = SGD(lr = 1e-1) model = ResNet.build(32, 32, 1, 10, (9, 9, 9), (64, 64, 128, 256), reg = 0.0001) model.compile(loss = "categorical_crossentropy", optimizer = opt, metrics = ["accuracy"]) # otherwise, we 're using a checkpoint model else: # load the checkpoint from disk print("[INFO] loading {}...".format(args["model"])) model = load_model(args["model"]) # update the learning rate print("[INFO] old learning rate: {}".format( K.get_value(model.optimizer.lr))) K.set_value(model.optimizer.lr, 1e-2) print("[INFO] new learning rate: {}".format( K.get_value(model.optimizer.lr)))