python - load multiple pickle objects into a single dictionary

  • Last Update :
  • Techknowledgy :

First, you need not store the number of items you pickled separately if you stop loading when you hit the end of the file:

def loadall(filename):
   with open(filename, "rb") as f:
   while True:
   try:
   yield pickle.load(f)
except EOFError:
   break

items = loadall(myfilename)

Using a list, tuple, or dict is by far the most common way to do this:

import pickle
PIK = "pickle.dat"

data = ["A", "b", "C", "d"]
with open(PIK, "wb") as f:
   pickle.dump(data, f)
with open(PIK, "rb") as f:
   print pickle.load(f)

That prints:

['A', 'b', 'C', 'd']

However, a pickle file can contain any number of pickles. Here's code producing the same output. But note that it's harder to write and to understand:

with open(PIK, "wb") as f:
   pickle.dump(len(data), f)
for value in data:
   pickle.dump(value, f)
data2 = []
with open(PIK, "rb") as f:
   for _ in range(pickle.load(f)):
   data2.append(pickle.load(f))
print data2

Try this:

import pickle

file = open('test.pkl', 'wb')
obj_1 = ['test_1', {
   'ability',
   'mobility'
}]
obj_2 = ['test_2', {
   'ability',
   'mobility'
}]
obj_3 = ['test_3', {
   'ability',
   'mobility'
}]

pickle.dump(obj_1, file)
pickle.dump(obj_2, file)
pickle.dump(obj_3, file)

file.close()

file = open('test.pkl', 'rb')
obj_1 = pickle.load(file)
obj_2 = pickle.load(file)
obj_3 = pickle.load(file)
print(obj_1)
print(obj_2)
print(obj_3)
file.close()

You can run a loop (as the accepted answer shows) to keep unpickling rows until you reach the end-of-file (at which point an EOFError is raised).

data = []
with open("data.pickle", "rb") as f:
   while True:
   try:
   data.append(pickle.load(f))
except EOFError:
   break

Minimal Verifiable Example

import pickle

# Dumping step
data = [{
   'a': 1
}, {
   'b': 2
}]
with open('test.pkl', 'wb') as f:
   for d in data:
   pickle.dump(d, f)

# Loading step
data2 = []
with open('test.pkl', 'rb') as f:
   while True:
   try:
   data2.append(pickle.load(f))
except EOFError:
   break

data2
#[{
   'a': 1
}, {
   'b': 2
}]

data == data2
# True

Of course, this is under the assumption that your objects have to be pickled individually. You can also store your data as a single list of object, then use a single pickle/unpickle call (no need for loops).

data = [{
   'a': 1
}, {
   'b': 2
}] # list of dicts as an example
with open('test.pkl', 'wb') as f:
   pickle.dump(data, f)

with open('test.pkl', 'rb') as f:
   data2 = pickle.load(f)

data2
#[{
   'a': 1
}, {
   'b': 2
}]

I will give an object-oriented demo using pickle to store and restore one or multi object:

class Worker(object):

def __init__(self, name, addr):
self.name = name
self.addr = addr

def __str__(self):
string = u'[<Worker> name:%s addr:%s]' %(self.name, self.addr)
   return string

   # output one item
   with open('testfile.bin', 'wb') as f:
   w1 = Worker('tom1', 'China')
   pickle.dump(w1, f)

   # input one item
   with open('testfile.bin', 'rb') as f:
   w1_restore = pickle.load(f)
   print 'item: %s' %w1_restore

   # output multi items
   with open('testfile.bin', 'wb') as f:
   w1 = Worker('tom2', 'China')
   w2 = Worker('tom3', 'China')
   pickle.dump([w1, w2], f)

   # input multi items
   with open('testfile.bin', 'rb') as f:
   w_list = pickle.load(f)

   for w in w_list:
   print 'item-list: %s' %w

output:

item: [<Worker> name:tom1 addr:China]
   item-list: [<Worker> name:tom2 addr:China]
      item-list: [<Worker> name:tom3 addr:China]

It's easy if you use klepto, which gives you the ability to transparently store objects in files or databases. It uses a dict API, and allows you to dump and/or load specific entries from an archive (in the case below, serialized objects stored one entry per file in a directory called scores).

>>>
import klepto
   >>>
   scores = klepto.archives.dir_archive('scores', serialized = True) >>>
   scores['Guido'] = 69 >>>
   scores['Fernando'] = 42 >>>
   scores['Polly'] = 101 >>>
   scores.dump() >>>
   # access the archive, and load only one >>>
   results = klepto.archives.dir_archive('scores', serialized = True) >>>
   results.load('Polly') >>>
   results
dir_archive('scores', {
      'Polly': 101
   }, cached = True) >>>
   results['Polly']
101
   >>>
   # load all the scores >>>
   results.load() >>>
   results['Guido']
69
   >>>

Suggestion : 2

So my problem is this... I have multiple anycodings_pickle Pickle object files (which are Pickled anycodings_pickle Dictionaries) and I want to load them all, anycodings_pickle but essentially merge each dictionary into a anycodings_pickle single larger dictionary. ,In essence, it works, but just overwrites anycodings_pickle the contents of my_dict rather than append anycodings_pickle each pickle object.,I have pickle_file1 and pickle_file2 both anycodings_pickle contain dictionaries. I would like the anycodings_pickle contents of pickle_file1 and pickle_file2 anycodings_pickle loaded into my_dict_final.,@Nunchux, @Vikas Ojha If the anycodings_pickle dictionaries happen to have common keys, anycodings_pickle the update method will, unfortunately, anycodings_pickle overwrite the values for those common anycodings_pickle keys. Example:

EDIT As per request here is what i have so anycodings_pickle far:

for pkl_file in pkl_file_list:
   pickle_in = open(pkl_file, 'rb')
my_dict = pickle.load(pickle_in)
pickle_in.close()
my_dict_final = {}
# Create an empty dictionary
with open('pickle_file1', 'rb') as f:
   my_dict_final.update(pickle.load(f)) # Update contents of file1 to the dictionary
with open('pickle_file2', 'rb') as f:
   my_dict_final.update(pickle.load(f)) # Update contents of file2 to the dictionary
print my_dict_final

You can use the dict.update function.

pickle_dict1 = pickle.load(picke_file1)
pickle_dict2 = pickle.load(picke_file2)
my_dict_final = pickle_dict1
my_dict_final.update(pickle_dict2)

@Nunchux, @Vikas Ojha If the anycodings_pickle dictionaries happen to have common keys, anycodings_pickle the update method will, unfortunately, anycodings_pickle overwrite the values for those common anycodings_pickle keys. Example:

>>> dict1 = {
      'a': 4,
      'b': 3,
      'c': 0,
      'd': 4
   } >>>
   dict2 = {
      'a': 1,
      'b': 8,
      'c': 5
   }

   >>>
   All_dict = {} >>>
   All_dict.update(dict1) >>>
   All_dict.update(dict2)

   >>>
   All_dict {
      'a': 1,
      'b': 8,
      'c': 5,
      'd': 4
   }

If you'd like to avoid this and keep anycodings_pickle adding the counts of common keys, one anycodings_pickle option is to use the following strategy. anycodings_pickle Applied to your example, here is a anycodings_pickle minimal working example:

import os
import pickle
from collections
import Counter

dict1 = {
   'a': 4,
   'b': 3,
   'c': 0,
   'd': 4
}
dict2 = {
   'a': 1,
   'b': 8,
   'c': 5
}

# just creating two pickle files:
   pickle_out = open("dict1.pickle", "wb")
pickle.dump(dict1, pickle_out)
pickle_out.close()

pickle_out = open("dict2.pickle", "wb")
pickle.dump(dict2, pickle_out)
pickle_out.close()

# Here comes:
   pkl_file_list = ["dict1.pickle", "dict2.pickle"]

All_dict = Counter({})
for pkl_file in pkl_file_list:
   if os.path.exists(pkl_file):
   pickle_in = open(pkl_file, "rb")
dict_i = pickle.load(pickle_in)
All_dict = All_dict + Counter(dict_i)

print(dict(All_dict))

This will happily give you:

{
   'a': 5,
   'b': 11,
   'd': 4,
   'c': 5
}

Suggestion : 3

The pickle module keeps track of the objects it has already serialized, so that later references to the same object won’t be serialized again. marshal doesn’t do this.,marshal cannot be used to serialize user-defined classes and their instances. pickle can save and restore class instances transparently, however the class definition must be importable and live in the same module as when the object was stored.,To unpickle external objects, the unpickler must have a custom persistent_load() method that takes a persistent ID object and returns the referenced object.,Read the pickled representation of an object from the open file object file and return the reconstituted object hierarchy specified therein. This is equivalent to Unpickler(file).load().

class Foo:
   attr = 'A class attribute'

picklestring = pickle.dumps(Foo)
def save(obj):
   return (obj.__class__, obj.__dict__)

def restore(cls, attributes):
   obj = cls.__new__(cls)
obj.__dict__.update(attributes)
return obj
f = io.BytesIO()
p = pickle.Pickler(f)
p.dispatch_table = copyreg.dispatch_table.copy()
p.dispatch_table[SomeClass] = reduce_SomeClass
class MyPickler(pickle.Pickler):
   dispatch_table = copyreg.dispatch_table.copy()
dispatch_table[SomeClass] = reduce_SomeClass
f = io.BytesIO()
p = MyPickler(f)
copyreg.pickle(SomeClass, reduce_SomeClass)
f = io.BytesIO()
p = pickle.Pickler(f)