##// END OF EJS Templates
add utils.pickleutil.use_dill...
add utils.pickleutil.use_dill enables dill extended serialization support

File last commit:

r13636:fca0ad0e
r13636:fca0ad0e
Show More
pickleutil.py
382 lines | 10.5 KiB | text/x-python | PythonLexer
MinRK
prep newparallel for rebase...
r3539 # encoding: utf-8
"""Pickle related utilities. Perhaps this should be called 'can'."""
__docformat__ = "restructuredtext en"
#-------------------------------------------------------------------------------
Matthias BUSSONNIER
update copyright to 2011/20xx-2011...
r5390 # Copyright (C) 2008-2011 The IPython Development Team
MinRK
prep newparallel for rebase...
r3539 #
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# Imports
#-------------------------------------------------------------------------------
MinRK
Improvements to dependency handling...
r3607 import copy
MinRK
use logger for canning import error
r8034 import logging
MinRK
update API after sagedays29...
r3664 import sys
MinRK
py3compat for class canning
r9002 from types import FunctionType
MinRK
Improvements to dependency handling...
r3607
MinRK
better serialization for parallel code...
r7967 try:
import cPickle as pickle
except ImportError:
import pickle
Thomas Kluyver
Use explicit relative imports...
r13347 from . import codeutil # This registers a hook when it's imported
from . import py3compat
from .importstring import import_item
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 from .py3compat import string_types, iteritems
MinRK
better serialization for parallel code...
r7967
MinRK
use logger for canning import error
r8034 from IPython.config import Application
MinRK
better serialization for parallel code...
r7967 if py3compat.PY3:
buffer = memoryview
MinRK
py3compat for class canning
r9002 class_type = type
else:
from types import ClassType
class_type = (type, ClassType)
MinRK
prep newparallel for rebase...
r3539
MinRK
Improvements to dependency handling...
r3607 #-------------------------------------------------------------------------------
MinRK
add utils.pickleutil.use_dill...
r13636 # Functions
#-------------------------------------------------------------------------------
def use_dill():
"""use dill to expand serialization support
adds support for object methods and closures to serialization.
"""
# import dill causes most of the magic
import dill
# dill doesn't work with cPickle,
# tell the two relevant modules to use plain pickle
global pickle
import pickle
try:
from IPython.kernel.zmq import serialize
except ImportError:
pass
else:
serialize.pickle = pickle
# disable special function handling, let dill take care of it
can_map.pop(FunctionType, None)
#-------------------------------------------------------------------------------
MinRK
Improvements to dependency handling...
r3607 # Classes
#-------------------------------------------------------------------------------
MinRK
prep newparallel for rebase...
r3539 class CannedObject(object):
MinRK
add hook to CannedObject...
r9985 def __init__(self, obj, keys=[], hook=None):
MinRK
docstring for CannedObject.__init__
r9987 """can an object for safe pickling
Parameters
==========
obj:
The object to be canned
keys: list (optional)
list of attribute names that will be explicitly canned / uncanned
hook: callable (optional)
An optional extra callable,
which can do additional processing of the uncanned object.
large data may be offloaded into the buffers list,
used for zero-copy transfers.
"""
MinRK
added dependency decorator
r3546 self.keys = keys
MinRK
Improvements to dependency handling...
r3607 self.obj = copy.copy(obj)
MinRK
add hook to CannedObject...
r9985 self.hook = can(hook)
MinRK
added dependency decorator
r3546 for key in keys:
MinRK
Improvements to dependency handling...
r3607 setattr(self.obj, key, can(getattr(obj, key)))
MinRK
better serialization for parallel code...
r7967
self.buffers = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def get_object(self, g=None):
MinRK
added dependency decorator
r3546 if g is None:
MinRK
better serialization for parallel code...
r7967 g = {}
MinRK
add hook to CannedObject...
r9985 obj = self.obj
MinRK
added dependency decorator
r3546 for key in self.keys:
MinRK
add hook to CannedObject...
r9985 setattr(obj, key, uncan(getattr(obj, key), g))
MinRK
use canning hook in dependent...
r9986
MinRK
add hook to CannedObject...
r9985 if self.hook:
self.hook = uncan(self.hook, g)
self.hook(obj, g)
MinRK
added dependency decorator
r3546 return self.obj
MinRK
better serialization for parallel code...
r7967
MinRK
added dependency decorator
r3546
MinRK
add Reference object
r3643 class Reference(CannedObject):
"""object for wrapping a remote reference by name."""
def __init__(self, name):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if not isinstance(name, string_types):
MinRK
add Reference object
r3643 raise TypeError("illegal name: %r"%name)
self.name = name
MinRK
better serialization for parallel code...
r7967 self.buffers = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add Reference object
r3643 def __repr__(self):
return "<Reference: %r>"%self.name
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def get_object(self, g=None):
MinRK
add Reference object
r3643 if g is None:
MinRK
better serialization for parallel code...
r7967 g = {}
MinRK
use eval to uncan References...
r6159
return eval(self.name, g)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
added dependency decorator
r3546
MinRK
prep newparallel for rebase...
r3539 class CannedFunction(CannedObject):
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
prep newparallel for rebase...
r3539 def __init__(self, f):
MinRK
better serialization for parallel code...
r7967 self._check_type(f)
Thomas Kluyver
Update function attribute names...
r13362 self.code = f.__code__
if f.__defaults__:
self.defaults = [ can(fd) for fd in f.__defaults__ ]
MinRK
can func_defaults...
r8041 else:
self.defaults = None
MinRK
update API after sagedays29...
r3664 self.module = f.__module__ or '__main__'
MinRK
Improvements to dependency handling...
r3607 self.__name__ = f.__name__
MinRK
better serialization for parallel code...
r7967 self.buffers = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def _check_type(self, obj):
MinRK
prep newparallel for rebase...
r3539 assert isinstance(obj, FunctionType), "Not a function type"
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def get_object(self, g=None):
MinRK
update API after sagedays29...
r3664 # try to load function back into its module:
if not self.module.startswith('__'):
MinRK
Don't catch ImportError when trying to unpack module functions...
r8405 __import__(self.module)
g = sys.modules[self.module].__dict__
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
prep newparallel for rebase...
r3539 if g is None:
MinRK
better serialization for parallel code...
r7967 g = {}
MinRK
can func_defaults...
r8041 if self.defaults:
defaults = tuple(uncan(cfd, g) for cfd in self.defaults)
else:
defaults = None
newFunc = FunctionType(self.code, g, self.__name__, defaults)
MinRK
prep newparallel for rebase...
r3539 return newFunc
MinRK
can classes
r9000 class CannedClass(CannedObject):
def __init__(self, cls):
self._check_type(cls)
self.name = cls.__name__
self.old_style = not isinstance(cls, type)
self._canned_dict = {}
for k,v in cls.__dict__.items():
if k not in ('__weakref__', '__dict__'):
self._canned_dict[k] = can(v)
if self.old_style:
mro = []
else:
mro = cls.mro()
self.parents = [ can(c) for c in mro[1:] ]
self.buffers = []
def _check_type(self, obj):
MinRK
py3compat for class canning
r9002 assert isinstance(obj, class_type), "Not a class type"
MinRK
can classes
r9000
def get_object(self, g=None):
parents = tuple(uncan(p, g) for p in self.parents)
return type(self.name, parents, uncan_dict(self._canned_dict, g=g))
MinRK
better serialization for parallel code...
r7967
class CannedArray(CannedObject):
def __init__(self, obj):
MinRK
don't import numpy in pickleutil until it is used...
r12541 from numpy import ascontiguousarray
MinRK
better serialization for parallel code...
r7967 self.shape = obj.shape
MinRK
use brief dtype, rather than full object
r7971 self.dtype = obj.dtype.descr if obj.dtype.fields else obj.dtype.str
MinRK
better serialization for parallel code...
r7967 if sum(obj.shape) == 0:
# just pickle it
self.buffers = [pickle.dumps(obj, -1)]
else:
# ensure contiguous
MinRK
don't import numpy in pickleutil until it is used...
r12541 obj = ascontiguousarray(obj, dtype=None)
MinRK
better serialization for parallel code...
r7967 self.buffers = [buffer(obj)]
def get_object(self, g=None):
MinRK
don't import numpy in pickleutil until it is used...
r12541 from numpy import frombuffer
MinRK
better serialization for parallel code...
r7967 data = self.buffers[0]
if sum(self.shape) == 0:
# no shape, we just pickled it
return pickle.loads(data)
else:
MinRK
don't import numpy in pickleutil until it is used...
r12541 return frombuffer(data, dtype=self.dtype).reshape(self.shape)
MinRK
better serialization for parallel code...
r7967
class CannedBytes(CannedObject):
wrap = bytes
def __init__(self, obj):
self.buffers = [obj]
def get_object(self, g=None):
data = self.buffers[0]
return self.wrap(data)
def CannedBuffer(CannedBytes):
wrap = buffer
MinRK
Improvements to dependency handling...
r3607 #-------------------------------------------------------------------------------
# Functions
#-------------------------------------------------------------------------------
MinRK
adjust how canning deals with import strings...
r8081 def _logger():
"""get the logger for the current Application
the root logger will be used if no Application is running
"""
MinRK
use logger for canning import error
r8034 if Application.initialized():
logger = Application.instance().log
else:
logger = logging.getLogger()
if not logger.handlers:
logging.basicConfig()
MinRK
adjust how canning deals with import strings...
r8081
return logger
def _import_mapping(mapping, original=None):
"""import any string-keys in a type mapping
"""
log = _logger()
log.debug("Importing canning map")
Thomas Kluyver
Fix for dictionary iteration in pickleutil
r13391 for key,value in list(mapping.items()):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(key, string_types):
MinRK
adjust how canning deals with import strings...
r8081 try:
cls = import_item(key)
except Exception:
if original and key not in original:
# only message on user-added classes
MinRK
don't import numpy in pickleutil until it is used...
r12541 log.error("canning class not importable: %r", key, exc_info=True)
MinRK
adjust how canning deals with import strings...
r8081 mapping.pop(key)
else:
mapping[cls] = mapping.pop(key)
MinRK
prep newparallel for rebase...
r3539
MinRK
add istype to canning...
r9138 def istype(obj, check):
"""like isinstance(obj, check), but strict
This won't catch subclasses.
"""
if isinstance(check, tuple):
for cls in check:
if type(obj) is cls:
return True
return False
else:
return type(obj) is check
MinRK
better serialization for parallel code...
r7967 def can(obj):
"""prepare an object for pickling"""
MinRK
adjust how canning deals with import strings...
r8081
import_needed = False
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for cls,canner in iteritems(can_map):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(cls, string_types):
MinRK
adjust how canning deals with import strings...
r8081 import_needed = True
break
MinRK
add istype to canning...
r9138 elif istype(obj, cls):
MinRK
better serialization for parallel code...
r7967 return canner(obj)
MinRK
adjust how canning deals with import strings...
r8081
if import_needed:
# perform can_map imports, then try again
# this will usually only happen once
_import_mapping(can_map, _original_can_map)
return can(obj)
MinRK
better serialization for parallel code...
r7967 return obj
MinRK
can classes
r9000 def can_class(obj):
MinRK
py3compat for class canning
r9002 if isinstance(obj, class_type) and obj.__module__ == '__main__':
MinRK
can classes
r9000 return CannedClass(obj)
else:
return obj
MinRK
better serialization for parallel code...
r7967 def can_dict(obj):
"""can the *values* of a dict"""
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, dict):
MinRK
prep newparallel for rebase...
r3539 newobj = {}
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k, v in iteritems(obj):
MinRK
prep newparallel for rebase...
r3539 newobj[k] = can(v)
return newobj
else:
return obj
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 sequence_types = (list, tuple, set)
MinRK
better serialization for parallel code...
r7967 def can_sequence(obj):
"""can the elements of a sequence"""
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, sequence_types):
MinRK
prep newparallel for rebase...
r3539 t = type(obj)
return t([can(i) for i in obj])
else:
return obj
def uncan(obj, g=None):
MinRK
better serialization for parallel code...
r7967 """invert canning"""
MinRK
adjust how canning deals with import strings...
r8081
import_needed = False
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for cls,uncanner in iteritems(uncan_map):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(cls, string_types):
MinRK
adjust how canning deals with import strings...
r8081 import_needed = True
break
elif isinstance(obj, cls):
MinRK
better serialization for parallel code...
r7967 return uncanner(obj, g)
MinRK
adjust how canning deals with import strings...
r8081
if import_needed:
# perform uncan_map imports, then try again
# this will usually only happen once
_import_mapping(uncan_map, _original_uncan_map)
return uncan(obj, g)
MinRK
better serialization for parallel code...
r7967 return obj
def uncan_dict(obj, g=None):
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, dict):
MinRK
prep newparallel for rebase...
r3539 newobj = {}
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k, v in iteritems(obj):
MinRK
prep newparallel for rebase...
r3539 newobj[k] = uncan(v,g)
return newobj
else:
return obj
MinRK
better serialization for parallel code...
r7967 def uncan_sequence(obj, g=None):
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, sequence_types):
MinRK
prep newparallel for rebase...
r3539 t = type(obj)
return t([uncan(i,g) for i in obj])
else:
return obj
MinRK
use canning hook in dependent...
r9986 def _uncan_dependent_hook(dep, g=None):
dep.check_dependency()
def can_dependent(obj):
return CannedObject(obj, keys=('f', 'df'), hook=_uncan_dependent_hook)
MinRK
prep newparallel for rebase...
r3539
MinRK
better serialization for parallel code...
r7967 #-------------------------------------------------------------------------------
MinRK
adjust how canning deals with import strings...
r8081 # API dictionaries
MinRK
better serialization for parallel code...
r7967 #-------------------------------------------------------------------------------
# These dicts can be extended for custom serialization of new objects
can_map = {
MinRK
use canning hook in dependent...
r9986 'IPython.parallel.dependent' : can_dependent,
MinRK
better serialization for parallel code...
r7967 'numpy.ndarray' : CannedArray,
FunctionType : CannedFunction,
bytes : CannedBytes,
buffer : CannedBuffer,
MinRK
py3compat for class canning
r9002 class_type : can_class,
MinRK
better serialization for parallel code...
r7967 }
uncan_map = {
CannedObject : lambda obj, g: obj.get_object(g),
}
MinRK
adjust how canning deals with import strings...
r8081 # for use in _import_mapping:
_original_can_map = can_map.copy()
_original_uncan_map = uncan_map.copy()