**[Update]** Here's how I would do it:

import numpy as np import tensorflow as tf x = tf.placeholder(shape = [None, 5], dtype = tf.float32, name = 'x') W = tf.Variable(np.zeros([5, 5]), dtype = tf.float32, name = 'W') b = tf.Variable(np.zeros([5]), dtype = tf.float32, name = 'b') y = tf.add(tf.matmul(x, W), b) with tf.Session() as session: batch = np.ones([2, 5]) session.run(tf.global_variables_initializer()) print session.run(y, feed_dict = { x: batch }) # prints[2, 5] zeros # store the current value store = { v.name: v.eval(session) for v in tf.trainable_variables() } print store # prints[5, 5] and[5] zeros # update new = { 'W:0': np.ones([5, 5]), 'b:0': np.ones([5]) } session.run(tf.tuple([tf.assign(var, new [var.name]) for var in tf.trainable_variables() ])) print session.run(y, feed_dict = { x: batch }) # prints[2, 5] sixes # restore session.run(tf.tuple([tf.assign(var, store[var.name]) for var in tf.trainable_variables() ])) print session.run(y, feed_dict = { x: batch }) # prints[2, 5] zeros again

It wasn't my original intent to answer this question myself, but I've come up with a method that works fairly well. So, I thought I'd share it. The key insight came from this very clever answer. The approach is to reuse the assignment nodes created for inital variable assignment. A complete class implementing that approach is given below.

import tensorflow as tf class TensorFlowState(object): def __init__(self): # Get the graph. graph = tf.get_default_graph() # Extract the global varibles from the graph. self.gvars = graph.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # Exract the Assign operations for later use. self.assign_ops = [graph.get_operation_by_name(v.op.name + "/Assign") for v in self.gvars ] # Extract the initial value ops from each Assign op for later use. self.init_values = [op.inputs[1] for op in self.assign_ops ] def start(self, sess): self.sess = sess def store(self): # Record the current state of the TF global varaibles self.state = self.sess.run(self.gvars) def restore(self): # Create a dictionary of the iniailizers and stored state of globals. feed_dict = { init_value: val for init_value, val in zip(self.init_values, self.state) } # Use the initializer ops for each variable to load the stored values. return(self.sess.run(self.assign_ops, feed_dict = feed_dict))

In this post we are going to talk about how to save the parameters into the disk and restore the saved parameters from the disk. The savable/restorable paramters of the network are Variables (i.e. weights and biases). ,Important Note: In order to restore the parameters, the graph should be defined. Since we defined the graph in top, we didn't have a problem restoring the parameters. But what happens if we have not loaded the graph? ,Notice that this time we did not initialize the variables in our session. Instead, we restored them from the disk. ,Now that all the things that you need is saved in the disk, you can load your saved variables in the session using saver.restore():

To save and restore your variables, all you need to do is to call the `tf.train.Saver()`

at the end of you graph.

# create the graph X = tf.placeholder(..) Y = tf.placeholder(..) w = tf.get_variale(..) b = tf.get_variale(..) ... loss = tf.losses.mean_squared_error(..) optimizer = tf.train.AdamOptimizer(..).minimize(loss) ... saver = tf.train.Saver()

```
# TRAIN
with tf.Session() as sess:
sess.run(tf.globale_variables_initializer())
# train our model
for step in range(steps):
sess.run(optimizer)
...
saved_path = saver.save(sess, './my-model', global_step = step)
```

```
# TEST
with tf.Session() as sess:
saver.restore(sess, './my-model')
...
```

```
# TRAIN
with tf.Session() as sess:
sess.run(tf.globale_variables_initializer())
# train our model
for step in range(steps):
sess.run(optimizer)
...
saved_path = saver.save(sess, './my-model', global_step = step)
```

```
# TEST
with tf.Session() as sess:
saver.restore(sess, './my-model')
...
```

import tensorflow as tf # create variables a and b a = tf.get_variable("A", initializer = tf.constant(3, shape = [2])) b = tf.get_variable("B", initializer = tf.constant(5, shape = [3]))

# initialize all of the variables init_op = tf.global_variables_initializer()

```
# run the session
with tf.Session() as sess:
# initialize all of the variables in the session
sess.run(init_op)
# run the session to get the value of the variable
a_out, b_out = sess.run([a, b])
print('a = ', a_out)
print('b = ', b_out)
```

TensorFlow saves variables in binary checkpoint files that map variable names to tensor values.,Create a Saver with tf.train.Saver() to manage all variables in the model. For example, the following snippet demonstrates how to call the tf.train.Saver.save method to save variables to checkpoint files:,You can create as many Saver objects as you want if you need to save and restore different subsets of the model variables. The same variable can be listed in multiple saver objects; its value is only changed when the Saver.restore() method is run., You can create as many Saver objects as you want if you need to save and restore different subsets of the model variables. The same variable can be listed in multiple saver objects; its value is only changed when the Saver.restore() method is run.

Create a `Saver`

with `tf.train.Saver()`

to manage all variables in the model. For example, the following snippet demonstrates how to call the `tf.train.Saver.save`

method to save variables to checkpoint files:

# Create some variables. v1 = tf.get_variable("v1", shape = [3], initializer = tf.zeros_initializer) v2 = tf.get_variable("v2", shape = [5], initializer = tf.zeros_initializer) inc_v1 = v1.assign(v1 + 1) dec_v2 = v2.assign(v2 - 1) # Add an op to initialize the variables. init_op = tf.global_variables_initializer() # Add ops to save and restore all the variables. saver = tf.train.Saver() # Later, launch the model, initialize the variables, do some work, and save the # variables to disk. with tf.Session() as sess: sess.run(init_op) # Do some work with the model. inc_v1.op.run() dec_v2.op.run() # Save the variables to disk. save_path = saver.save(sess, "/tmp/model.ckpt") print("Model saved in path: %s" % save_path)

The `tf.train.Saver`

object not only saves variables to checkpoint files, it also restores variables. Note that when you restore variables you do not have to initialize them beforehand. For example, the following snippet demonstrates how to call the `tf.train.Saver.restore`

method to restore variables from the checkpoint files:

tf.reset_default_graph() # Create some variables. v1 = tf.get_variable("v1", shape = [3]) v2 = tf.get_variable("v2", shape = [5]) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Later, launch the model, use the saver to restore variables from disk, and # do some work with the model. with tf.Session() as sess: # Restore variables from disk. saver.restore(sess, "/tmp/model.ckpt") print("Model restored.") # Check the values of the variables print("v1 : %s" % v1.eval()) print("v2 : %s" % v2.eval())

Continuing from the save/restore examples shown earlier:

tf.reset_default_graph() # Create some variables. v1 = tf.get_variable("v1", [3], initializer = tf.zeros_initializer) v2 = tf.get_variable("v2", [5], initializer = tf.zeros_initializer) # Add ops to save and restore only `v2` using the name "v2" saver = tf.train.Saver({ "v2": v2 }) # Use the saver object normally after that. with tf.Session() as sess: # Initialize v1 since the saver will not. v1.initializer.run() saver.restore(sess, "/tmp/model.ckpt") print("v1 : %s" % v1.eval()) print("v2 : %s" % v2.eval())

For example, the following code suggests a typical way to use `SavedModelBuilder`

to build a SavedModel:

export_dir = ... ... builder = tf.saved_model.builder.SavedModelBuilder(export_dir) with tf.Session(graph = tf.Graph()) as sess: ... builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING], signature_def_map = foo_signatures, assets_collection = foo_assets, strip_default_attrs = True) ... # Add a second MetaGraphDef for inference. with tf.Session(graph = tf.Graph()) as sess: ... builder.add_meta_graph([tag_constants.SERVING], strip_default_attrs = True) ... builder.save()

When you train a model, you use variables to hold and update parameters. Variables are in-memory buffers containing tensors. They must be explicitly initialized and can be saved to disk during and after training. You can later restore saved values to exercise or analyse the model.,Use tf.initialize_all_variables() to add an op to run variable initializers. Only run that op after you have fully constructed your model and launched it in a session.,Variables are saved in binary files that, roughly, contain a map from variable names to tensor values.,You can create as many saver objects as you want if you need to save and restore different subsets of the model variables. The same variable can be listed in multiple saver objects, its value is only changed when the saver restore() method is run.

Note that all these ops require you to specify the shape of the tensors. That shape automatically becomes the shape of the variable. Variables generally have a fixed shape, but TensorFlow provides advanced mechanisms to reshape variables.

# Create two variables. weights = tf.Variable(tf.random_normal([784, 200], stddev = 0.35), name = "weights") biases = tf.Variable(tf.zeros([200]), name = "biases")

Use `tf.initialize_all_variables()`

to add an op to run variable initializers. Only run that op after you have fully constructed your model and launched it in a session.

# Create two variables. weights = tf.Variable(tf.random_normal([784, 200], stddev = 0.35), name = "weights") biases = tf.Variable(tf.zeros([200]), name = "biases") ... # Add an op to initialize the variables. init_op = tf.initialize_all_variables() # Later, when launching the model with tf.Session() as sess: # Run the init operation. sess.run(init_op) ... # Use the model ...

To initialize a new variable from the value of another variable use the other variable's `initialized_value()`

property. You can use the initialized value directly as the initial value for the new variable, or you can use it as any other tensor to compute a value for the new variable.

# Create a variable with a random value. weights = tf.Variable(tf.random_normal([784, 200], stddev = 0.35), name = "weights") # Create another variable with the same value as 'weights'. w2 = tf.Variable(weights.initialized_value(), name = "w2") # Create another variable with twice the value of 'weights' w_twice = tf.Variable(weights.initialized_value() * 2.0, name = "w_twice")

The same `Saver`

object is used to restore variables. Note that when you restore variables from a file you do not have to initialize them beforehand.

# Create some variables. v1 = tf.Variable(..., name = "v1") v2 = tf.Variable(..., name = "v2") ... # Add ops to save and restore all the variables. saver = tf.train.Saver() # Later, launch the model, use the saver to restore variables from disk, and # do some work with the model. with tf.Session() as sess: # Restore variables from disk. saver.restore(sess, "/tmp/model.ckpt") print("Model restored.") # Do some work with the model ...

You can create as many saver objects as you want if you need to save and restore different subsets of the model variables. The same variable can be listed in multiple saver objects, its value is only changed when the saver

`restore()`

method is run.If you only restore a subset of the model variables at the start of a session, you have to run an initialize op for the other variables. See

`tf.initialize_variables()`

for more information.

# Create some variables. v1 = tf.Variable(..., name = "v1") v2 = tf.Variable(..., name = "v2") ... # Add ops to save and restore only 'v2' using the name "my_v2" saver = tf.train.Saver({ "my_v2": v2 }) # Use the saver object normally after that. ...

Last updated 2022-03-10 UTC.

## Setup

`import tensorflow as tf`

```
import tensorflow as tf
```

```
class Net(tf.keras.Model):
""
"A simple linear model."
""
def __init__(self):
super(Net, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def call(self, x):
return self.l1(x)
```

```
class Net(tf.keras.Model):
"""A simple linear model."""
def __init__(self):
super(Net, self).__init__()
self.l1 = tf.keras.layers.Dense(5)
def call(self, x):
return self.l1(x)
```

net = Net()

To help demonstrate all the features of `tf.train.Checkpoint`

, define a toy dataset and optimization step:

```
def toy_dataset():
inputs = tf.range(10.)[: , None]
labels = inputs * 5. + tf.range(5.)[None,: ]
return tf.data.Dataset.from_tensor_slices(
dict(x = inputs, y = labels)).repeat().batch(2)
```

```
def toy_dataset():
inputs = tf.range(10.)[:, None]
labels = inputs * 5. + tf.range(5.)[None, :]
return tf.data.Dataset.from_tensor_slices(
dict(x=inputs, y=labels)).repeat().batch(2)
```

```
def train_step(net, example, optimizer):
""
"Trains `net` on `example` using `optimizer`."
""
with tf.GradientTape() as tape:
output = net(example['x'])
loss = tf.reduce_mean(tf.abs(output - example['y']))
variables = net.trainable_variables
gradients = tape.gradient(loss, variables)
optimizer.apply_gradients(zip(gradients, variables))
return loss
```

Every tensor keeps a version counter, that is incremented every time it is marked dirty in any operation. When a Function saves any tensors for backward, a version counter of their containing Tensor is saved as well. Once you access self.saved_tensors it is checked, and if it is greater than the saved value an error is raised. This ensures that if you’re using in-place functions and not seeing any errors, you can be sure that the computed gradients are correct.,This last equation is the important one for writing your own gradients, as it decomposes our derivative formula into a simpler one that is easy to compute by hand.,Whether a tensor will be packed into a different tensor object depends on whether it is an output of its own grad_fn, which is an implementation detail subject to change and that users should not rely on.,It should be noted here that since uuu and vvv are real functions, and LLL is real by our assumption that fff is a part of a real valued function, we have:

x = torch.randn(5, requires_grad = True) y = x.pow(2) print(x.equal(y.grad_fn._saved_self)) # True print(x is y.grad_fn._saved_self) # True

x = torch.randn(5, requires_grad = True) y = x.exp() print(y.equal(y.grad_fn._saved_result)) # True print(y is y.grad_fn._saved_result) # False

```
# Define a train
function to be used in different threads
def train_fn():
x = torch.ones(5, 5, requires_grad = True)
# forward
y = (x + 3) * (x + 4) * 0.5
# backward
y.sum().backward()
# potential optimizer update
# User write their own threading code to drive the train_fn
threads = []
for _ in range(10):
p = threading.Thread(target = train_fn, args = ())
p.start()
threads.append(p)
for p in threads:
p.join()
```

```
class SelfDeletingTempFile():
def __init__(self):
self.name = os.path.join(tmp_dir, str(uuid.uuid4()))
def __del__(self):
os.remove(self.name)
def pack_hook(tensor):
temp_file = SelfDeletingTempFile()
torch.save(tensor, temp_file.name)
return temp_file
def unpack_hook(temp_file):
return torch.load(temp_file.name)
```

x = torch.randn(5, requires_grad = True) y = x.pow(2) y.grad_fn._raw_saved_self.register_hooks(pack_hook, unpack_hook)

# Only save on disk tensors that have size >= 1000 SAVE_ON_DISK_THRESHOLD = 1000 def pack_hook(x): if x.numel() < SAVE_ON_DISK_THRESHOLD: return x temp_file = SelfDeletingTempFile() torch.save(tensor, temp_file.name) return temp_file def unpack_hook(tensor_or_sctf): if isinstance(tensor_or_sctf, torch.Tensor): return tensor_or_sctf return torch.load(tensor_or_sctf.name) class Model(nn.Module): def forward(self, x): with torch.autograd.graph.saved_tensors_hooks(pack_hook, unpack_hook): #...compute output output = x return output model = Model() net = nn.DataParallel(model)