First, you need not store the number of items you pickled separately if you stop loading when you hit the end of the file:
def loadall(filename):
with open(filename, "rb") as f:
while True:
try:
yield pickle.load(f)
except EOFError:
break
items = loadall(myfilename)
Using a list, tuple, or dict is by far the most common way to do this:
import pickle
PIK = "pickle.dat"
data = ["A", "b", "C", "d"]
with open(PIK, "wb") as f:
pickle.dump(data, f)
with open(PIK, "rb") as f:
print pickle.load(f)
That prints:
['A', 'b', 'C', 'd']
However, a pickle file can contain any number of pickles. Here's code producing the same output. But note that it's harder to write and to understand:
with open(PIK, "wb") as f:
pickle.dump(len(data), f)
for value in data:
pickle.dump(value, f)
data2 = []
with open(PIK, "rb") as f:
for _ in range(pickle.load(f)):
data2.append(pickle.load(f))
print data2
Try this:
import pickle
file = open('test.pkl', 'wb')
obj_1 = ['test_1', {
'ability',
'mobility'
}]
obj_2 = ['test_2', {
'ability',
'mobility'
}]
obj_3 = ['test_3', {
'ability',
'mobility'
}]
pickle.dump(obj_1, file)
pickle.dump(obj_2, file)
pickle.dump(obj_3, file)
file.close()
file = open('test.pkl', 'rb')
obj_1 = pickle.load(file)
obj_2 = pickle.load(file)
obj_3 = pickle.load(file)
print(obj_1)
print(obj_2)
print(obj_3)
file.close()
You can run a loop (as the accepted answer shows) to keep unpickling rows until you reach the end-of-file (at which point an EOFError
is raised).
data = []
with open("data.pickle", "rb") as f:
while True:
try:
data.append(pickle.load(f))
except EOFError:
break
Minimal Verifiable Example
import pickle # Dumping step data = [{ 'a': 1 }, { 'b': 2 }] with open('test.pkl', 'wb') as f: for d in data: pickle.dump(d, f) # Loading step data2 = [] with open('test.pkl', 'rb') as f: while True: try: data2.append(pickle.load(f)) except EOFError: break data2 #[{ 'a': 1 }, { 'b': 2 }] data == data2 # True
Of course, this is under the assumption that your objects have to be pickled individually. You can also store your data as a single list of object, then use a single pickle/unpickle call (no need for loops).
data = [{ 'a': 1 }, { 'b': 2 }] # list of dicts as an example with open('test.pkl', 'wb') as f: pickle.dump(data, f) with open('test.pkl', 'rb') as f: data2 = pickle.load(f) data2 #[{ 'a': 1 }, { 'b': 2 }]
I will give an object-oriented demo using pickle
to store and restore one or multi object
:
class Worker(object):
def __init__(self, name, addr):
self.name = name
self.addr = addr
def __str__(self):
string = u'[<Worker> name:%s addr:%s]' %(self.name, self.addr)
return string
# output one item
with open('testfile.bin', 'wb') as f:
w1 = Worker('tom1', 'China')
pickle.dump(w1, f)
# input one item
with open('testfile.bin', 'rb') as f:
w1_restore = pickle.load(f)
print 'item: %s' %w1_restore
# output multi items
with open('testfile.bin', 'wb') as f:
w1 = Worker('tom2', 'China')
w2 = Worker('tom3', 'China')
pickle.dump([w1, w2], f)
# input multi items
with open('testfile.bin', 'rb') as f:
w_list = pickle.load(f)
for w in w_list:
print 'item-list: %s' %w
output:
item: [<Worker> name:tom1 addr:China]
item-list: [<Worker> name:tom2 addr:China]
item-list: [<Worker> name:tom3 addr:China]
It's easy if you use klepto
, which gives you the ability to transparently store objects in files or databases. It uses a dict API, and allows you to dump
and/or load
specific entries from an archive (in the case below, serialized objects stored one entry per file in a directory called scores
).
>>> import klepto >>> scores = klepto.archives.dir_archive('scores', serialized = True) >>> scores['Guido'] = 69 >>> scores['Fernando'] = 42 >>> scores['Polly'] = 101 >>> scores.dump() >>> # access the archive, and load only one >>> results = klepto.archives.dir_archive('scores', serialized = True) >>> results.load('Polly') >>> results dir_archive('scores', { 'Polly': 101 }, cached = True) >>> results['Polly'] 101 >>> # load all the scores >>> results.load() >>> results['Guido'] 69 >>>
So my problem is this... I have multiple anycodings_pickle Pickle object files (which are Pickled anycodings_pickle Dictionaries) and I want to load them all, anycodings_pickle but essentially merge each dictionary into a anycodings_pickle single larger dictionary. ,In essence, it works, but just overwrites anycodings_pickle the contents of my_dict rather than append anycodings_pickle each pickle object.,I have pickle_file1 and pickle_file2 both anycodings_pickle contain dictionaries. I would like the anycodings_pickle contents of pickle_file1 and pickle_file2 anycodings_pickle loaded into my_dict_final.,@Nunchux, @Vikas Ojha If the anycodings_pickle dictionaries happen to have common keys, anycodings_pickle the update method will, unfortunately, anycodings_pickle overwrite the values for those common anycodings_pickle keys. Example:
EDIT As per request here is what i have so anycodings_pickle far:
for pkl_file in pkl_file_list:
pickle_in = open(pkl_file, 'rb')
my_dict = pickle.load(pickle_in)
pickle_in.close()
my_dict_final = {} # Create an empty dictionary with open('pickle_file1', 'rb') as f: my_dict_final.update(pickle.load(f)) # Update contents of file1 to the dictionary with open('pickle_file2', 'rb') as f: my_dict_final.update(pickle.load(f)) # Update contents of file2 to the dictionary print my_dict_final
You can use the dict.update function.
pickle_dict1 = pickle.load(picke_file1) pickle_dict2 = pickle.load(picke_file2) my_dict_final = pickle_dict1 my_dict_final.update(pickle_dict2)
@Nunchux, @Vikas Ojha If the anycodings_pickle dictionaries happen to have common keys, anycodings_pickle the update method will, unfortunately, anycodings_pickle overwrite the values for those common anycodings_pickle keys. Example:
>>> dict1 = {
'a': 4,
'b': 3,
'c': 0,
'd': 4
} >>>
dict2 = {
'a': 1,
'b': 8,
'c': 5
}
>>>
All_dict = {} >>>
All_dict.update(dict1) >>>
All_dict.update(dict2)
>>>
All_dict {
'a': 1,
'b': 8,
'c': 5,
'd': 4
}
If you'd like to avoid this and keep anycodings_pickle adding the counts of common keys, one anycodings_pickle option is to use the following strategy. anycodings_pickle Applied to your example, here is a anycodings_pickle minimal working example:
import os import pickle from collections import Counter dict1 = { 'a': 4, 'b': 3, 'c': 0, 'd': 4 } dict2 = { 'a': 1, 'b': 8, 'c': 5 } # just creating two pickle files: pickle_out = open("dict1.pickle", "wb") pickle.dump(dict1, pickle_out) pickle_out.close() pickle_out = open("dict2.pickle", "wb") pickle.dump(dict2, pickle_out) pickle_out.close() # Here comes: pkl_file_list = ["dict1.pickle", "dict2.pickle"] All_dict = Counter({}) for pkl_file in pkl_file_list: if os.path.exists(pkl_file): pickle_in = open(pkl_file, "rb") dict_i = pickle.load(pickle_in) All_dict = All_dict + Counter(dict_i) print(dict(All_dict))
This will happily give you:
{
'a': 5,
'b': 11,
'd': 4,
'c': 5
}
The pickle module keeps track of the objects it has already serialized, so that later references to the same object won’t be serialized again. marshal doesn’t do this.,marshal cannot be used to serialize user-defined classes and their instances. pickle can save and restore class instances transparently, however the class definition must be importable and live in the same module as when the object was stored.,To unpickle external objects, the unpickler must have a custom persistent_load() method that takes a persistent ID object and returns the referenced object.,Read the pickled representation of an object from the open file object file and return the reconstituted object hierarchy specified therein. This is equivalent to Unpickler(file).load().
class Foo:
attr = 'A class attribute'
picklestring = pickle.dumps(Foo)
def save(obj):
return (obj.__class__, obj.__dict__)
def restore(cls, attributes):
obj = cls.__new__(cls)
obj.__dict__.update(attributes)
return obj
f = io.BytesIO() p = pickle.Pickler(f) p.dispatch_table = copyreg.dispatch_table.copy() p.dispatch_table[SomeClass] = reduce_SomeClass
class MyPickler(pickle.Pickler):
dispatch_table = copyreg.dispatch_table.copy()
dispatch_table[SomeClass] = reduce_SomeClass
f = io.BytesIO()
p = MyPickler(f)
copyreg.pickle(SomeClass, reduce_SomeClass) f = io.BytesIO() p = pickle.Pickler(f)