computing gradients wrt model inputs in tensorflow eager mode

  • Last Update :
  • Techknowledgy :

Hope this helps!

model = tf.keras.Sequential([
   tf.keras.layers.Dense(10, activation = tf.nn.relu, input_shape = (len(numeric_headers), )), # input shape required
   tf.keras.layers.Dense(10, activation = tf.nn.relu),
   tf.keras.layers.Dense(1, activation = tf.nn.sigmoid)
])

# model = MyModel()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name = 'train_accuracy')

test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name = 'test_accuracy')

def get_gradients(model, features):
   with tf.GradientTape() as tape:
   tape.watch(features)
predictions = model(features)
gradients = tape.gradient(predictions, features)
return gradients

def train_step(features, label):

   with tf.GradientTape() as tape:
   predictions = model(features)
loss = loss_object(label, predictions)

gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))

train_loss(loss)
train_accuracy(label, predictions)

def test_step(features, label):
   predictions = model(features)
t_loss = loss_object(label, predictions)

test_loss(t_loss)
test_accuracy(label, predictions)

EPOCHS = 5
for epoch in range(EPOCHS):
   for features, labels in train_ds:
   train_step(features, labels)

for features, labels in train_ds:
   test_step(features, labels)

template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print(template.format(epoch + 1,
   train_loss.result(),
   train_accuracy.result() * 100,
   test_loss.result(),
   test_accuracy.result() * 100))

if epoch == EPOCHS - 1:
   for features, labels in train_ds:
   print('-')
print(get_gradients(model, features))

Suggestion : 2

TensorFlow provides the tf.GradientTape API for automatic differentiation; that is, computing the gradient of a computation with respect to some inputs, usually tf.Variables. TensorFlow "records" relevant operations executed inside the context of a tf.GradientTape onto a "tape". TensorFlow then uses that tape to compute the gradients of a "recorded" computation using reverse mode differentiation.,You can also request gradients of the output with respect to intermediate values computed inside the tf.GradientTape context.,To differentiate automatically, TensorFlow needs to remember what operations happen in what order during the forward pass. Then, during the backward pass, TensorFlow traverses this list of operations in reverse order to compute gradients.,TensorFlow doesn't automatically cast between types, so, in practice, you'll often get a type error instead of a missing gradient.

Setup

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf

Here is a simple example:

x = tf.Variable(3.0)

with tf.GradientTape() as tape:
   y = x ** 2

Once you've recorded some operations, use GradientTape.gradient(target, sources) to calculate the gradient of some target (often a loss) relative to some source (often the model's variables):

# dy = 2 x * dx
dy_dx = tape.gradient(y, x)
dy_dx.numpy()

The above example uses scalars, but tf.GradientTape works as easily on any tensor:

w = tf.Variable(tf.random.normal((3, 2)), name = 'w')
b = tf.Variable(tf.zeros(2, dtype = tf.float32), name = 'b')
x = [
   [1., 2., 3.]
]

with tf.GradientTape(persistent = True) as tape:
   y = x @ w + b
loss = tf.reduce_mean(y ** 2)

To get the gradient of loss with respect to both variables, you can pass both as sources to the gradient method. The tape is flexible about how sources are passed and will accept any nested combination of lists or dictionaries and return the gradient structured the same way (see tf.nest).

[dl_dw, dl_db] = tape.gradient(loss, [w, b])

Suggestion : 3

I have a network made with InceptionNet, and for an input sample bx, I want to compute the gradients of the model output w.r.t. the hidden layer. I have the following code: , 6 days ago Mar 13, 2020  · I use network InceptionV3, I want to compute the gradients of the model output w.r.t. the input layer. I have the following code: origin_image = load_img("dog.jpg", … ,Any solution is appreciated. It doesn't have to be GradientTape, if there is any other way to compute these gradients.,The display of third-party trademarks and trade names on this site does not necessarily indicate any affiliation or endorsement of FaqCode4U.com.


bx = tf.reshape(x_batch[0,: ,: ,: ], (1, 299, 299, 3)) with tf.GradientTape() as gtape: #gtape.watch(x) preds = model(bx) print(preds.shape, end = '  ') class_idx = np.argmax(preds[0]) print(class_idx, end = '   ') class_output = model.output[: , class_idx] print(class_output, end = '   ') last_conv_layer = model.get_layer('inception_v3').get_layer('mixed10') #gtape.watch(last_conv_layer) print(last_conv_layer) grads = gtape.gradient(class_output, last_conv_layer.output) #[0] print(grads)
1._
bx = tf.reshape(x_batch[0,: ,: ,: ], (1, 299, 299, 3)) with tf.GradientTape() as gtape: #gtape.watch(x) preds = model(bx) print(preds.shape, end = '  ') class_idx = np.argmax(preds[0]) print(class_idx, end = '   ') class_output = model.output[: , class_idx] print(class_output, end = '   ') last_conv_layer = model.get_layer('inception_v3').get_layer('mixed10') #gtape.watch(last_conv_layer) print(last_conv_layer) grads = gtape.gradient(class_output, last_conv_layer.output) #[0] print(grads)
RuntimeError: tf.gradients is not supported when eager execution is enabled.Use tf.GradientTape instead.
RuntimeError: tf.gradients is not supported when eager execution is enabled. Use tf.GradientTape instead. 
model.summary() Model: "sequential_4"
_________________________________________________________________ Layer(type) Output Shape Param # === === === === === === === === === === === === === === === === === === === === === == inception_v3(Model)(None, 1000) 23851784 _________________________________________________________________ dense_5(Dense)(None, 2) 2002 === === === === === === === === === === === === === === === === === === === === === == Total params: 23, 853, 786 Trainable params: 23, 819, 354 Non - trainable params: 34, 432 _________________________________________________________________
def example(): def grad_cam(input_model, image, category_index, layer_name): gradModel = Model(inputs = [model.inputs], outputs = [model.get_layer(layer_name).output, model.output]) with tf.GradientTape() as tape: inputs = tf.cast(image, tf.float32)(convOutputs, predictions) = gradModel(inputs) loss = predictions[: , category_index] grads = tape.gradient(loss, convOutputs) castConvOutputs = tf.cast(convOutputs > 0, "float32") castGrads = tf.cast(grads > 0, "float32") guidedGrads = castConvOutputs * castGrads * gradsconvOutputs = convOutputs[0] guidedGrads = guidedGrads[0] weights = tf.reduce_mean(guidedGrads, axis = (0, 1)) cam = tf.reduce_sum(tf.multiply(weights, convOutputs), axis = -1) H, W = image.shape[1], image.shape[2] cam = np.maximum(cam, 0) # ReLU so we only get positive importancecam = cv2.resize(cam, (W, H), cv2.INTER_NEAREST) cam = cam / cam.max() return cam im = load_image_normalize(im_path, mean, std) print(im.shape) cam = grad_cam(model, im, 5, 'conv5_block16_concat') # Mass is class 5 # Loads reference CAM to compare our implementation with.reference = np.load("reference_cam.npy") error = np.mean((cam - reference) ** 2) print(f "Error from reference: {error:.4f}, should be less than 0.05") plt.imshow(load_image(im_path, df, preprocess = False), cmap = 'gray') plt.title("Original") plt.axis('off') plt.show() plt.imshow(load_image(im_path, df, preprocess = False), cmap = 'gray') plt.imshow(cam, cmap = 'magma', alpha = 0.5) plt.title("GradCAM") plt.axis('off') plt.show()

Suggestion : 4

Computes the sum of gradients of given tensors with respect to graph leaves.,Computes and returns the sum of gradients of outputs with respect to the inputs.,with_modules (bool) – record module hierarchy (including function names) corresponding to the callstack of the op. e.g. If module A’s forward call’s module B’s forward which contains an aten::add op, then aten::add’s module hierarchy is A.B Note that this support exist, at the moment, only for TorchScript models and not eager mode models.,Autograd includes a profiler that lets you inspect the cost of different operators inside your model - both on the CPU and GPU. There are two modes implemented at the moment - CPU-only using profile. and nvprof based (registers both CPU and GPU activity) using emit_nvtx.

for iterations...
   ...
   for param in model.parameters():
   param.grad = None
loss.backward()
>>> class Exp(Function):
   >>>
   @staticmethod >>>
   def forward(ctx, i):
   >>>
   result = i.exp() >>>
   ctx.save_for_backward(result) >>>
   return result >>>
      >>>
      @staticmethod >>>
      def backward(ctx, grad_output):
      >>>
      result, = ctx.saved_tensors >>>
      return grad_output * result >>>
         >>>
         # Use it by calling the apply method:
         >>>
         output = Exp.apply(input)
>>> x = torch.randn((1, 1), requires_grad = True) >>>
   with torch.autograd.profiler.profile() as prof:
   >>>
   for _ in range(100): # any normal python code, really!
   >>>
   y = x ** 2 >>
   y.backward() >>>
   # NOTE: some columns were removed
for brevity
   >>>
   print(prof.key_averages().table(sort_by = "self_cpu_time_total"))
   -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - -- -- -- -- -- -- -- - -- -- -- -- -- -- -- - -- -- -- -- -- -- -- -
   Name Self CPU total CPU time avg Number of Calls
   -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - -- -- -- -- -- -- -- - -- -- -- -- -- -- -- - -- -- -- -- -- -- -- -
   mul 32.048 ms 32.048 ms 200
pow 27.041 ms 27.041 ms 200
PowBackward0 9.727 ms 55.483 ms 100
torch::autograd::AccumulateGrad 9.148 ms 9.148 ms 100
torch::autograd::GraphRoot 691.816 us 691.816 us 100
   -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - -- -- -- -- -- -- -- - -- -- -- -- -- -- -- - -- -- -- -- -- -- -- -
nvprof --profile-from-start off -o trace_name.prof -- <regular command here>
>>> with torch.cuda.profiler.profile():
   ...model(x) # Warmup CUDA memory allocator and profiler
   ...with torch.autograd.profiler.emit_nvtx():
   ...model(x)
>>> import torch
>>> from torch import autograd
>>> class MyFunc(autograd.Function):
... @staticmethod
... def forward(ctx, inp):
... return inp.clone()
... @staticmethod
... def backward(ctx, gO):
... # Error during the backward pass
... raise RuntimeError("Some error in backward")
... return gO.clone()
>>> def run_fn(a):
... out = MyFunc.apply(a)
... return out.sum()
>>> inp = torch.rand(10, 10, requires_grad=True)
>>> out = run_fn(inp)
>>> out.backward()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
      File "/your/pytorch/install/torch/_tensor.py", line 93, in backward
      torch.autograd.backward(self, gradient, retain_graph, create_graph)
      File "/your/pytorch/install/torch/autograd/__init__.py", line 90, in backward
      allow_unreachable=True) # allow_unreachable flag
      File "/your/pytorch/install/torch/autograd/function.py", line 76, in apply
      return self._forward_cls.backward(self, *args)
      File "<stdin>", line 8, in backward
         RuntimeError: Some error in backward
         >>> with autograd.detect_anomaly():
         ... inp = torch.rand(10, 10, requires_grad=True)
         ... out = run_fn(inp)
         ... out.backward()
         Traceback of forward call that caused the error:
         File "tmp.py", line 53, in <module>
            out = run_fn(inp)
            File "tmp.py", line 44, in run_fn
            out = MyFunc.apply(a)
            Traceback (most recent call last):
            File "<stdin>", line 4, in <module>
                  File "/your/pytorch/install/torch/_tensor.py", line 93, in backward
                  torch.autograd.backward(self, gradient, retain_graph, create_graph)
                  File "/your/pytorch/install/torch/autograd/__init__.py", line 90, in backward
                  allow_unreachable=True) # allow_unreachable flag
                  File "/your/pytorch/install/torch/autograd/function.py", line 76, in apply
                  return self._forward_cls.backward(self, *args)
                  File "<stdin>", line 8, in backward
                     RuntimeError: Some error in backward