pickleutil.py
425 lines
| 11.6 KiB
| text/x-python
|
PythonLexer
MinRK
|
r3539 | # encoding: utf-8 | ||
"""Pickle related utilities. Perhaps this should be called 'can'.""" | ||||
MinRK
|
r16506 | # Copyright (c) IPython Development Team. | ||
# Distributed under the terms of the Modified BSD License. | ||||
MinRK
|
r3539 | |||
MinRK
|
r3607 | import copy | ||
MinRK
|
r8034 | import logging | ||
MinRK
|
r3664 | import sys | ||
MinRK
|
r9002 | from types import FunctionType | ||
MinRK
|
r3607 | |||
MinRK
|
r7967 | try: | ||
import cPickle as pickle | ||||
except ImportError: | ||||
import pickle | ||||
Thomas Kluyver
|
r13347 | from . import codeutil # This registers a hook when it's imported | ||
from . import py3compat | ||||
from .importstring import import_item | ||||
Thomas Kluyver
|
r13361 | from .py3compat import string_types, iteritems | ||
MinRK
|
r7967 | |||
MinRK
|
r8034 | from IPython.config import Application | ||
MinRK
|
r17057 | from IPython.utils.log import get_logger | ||
MinRK
|
r8034 | |||
MinRK
|
r7967 | if py3compat.PY3: | ||
buffer = memoryview | ||||
MinRK
|
r9002 | class_type = type | ||
else: | ||||
from types import ClassType | ||||
class_type = (type, ClassType) | ||||
MinRK
|
r3539 | |||
MinRK
|
r17044 | try: | ||
PICKLE_PROTOCOL = pickle.DEFAULT_PROTOCOL | ||||
except AttributeError: | ||||
PICKLE_PROTOCOL = pickle.HIGHEST_PROTOCOL | ||||
MinRK
|
r16506 | def _get_cell_type(a=None): | ||
"""the type of a closure cell doesn't seem to be importable, | ||||
so just create one | ||||
""" | ||||
def inner(): | ||||
return a | ||||
return type(py3compat.get_closure(inner)[0]) | ||||
cell_type = _get_cell_type() | ||||
MinRK
|
r3607 | #------------------------------------------------------------------------------- | ||
MinRK
|
r13636 | # Functions | ||
#------------------------------------------------------------------------------- | ||||
def use_dill(): | ||||
"""use dill to expand serialization support | ||||
adds support for object methods and closures to serialization. | ||||
""" | ||||
# import dill causes most of the magic | ||||
import dill | ||||
# dill doesn't work with cPickle, | ||||
# tell the two relevant modules to use plain pickle | ||||
global pickle | ||||
MinRK
|
r13649 | pickle = dill | ||
MinRK
|
r13636 | |||
try: | ||||
from IPython.kernel.zmq import serialize | ||||
except ImportError: | ||||
pass | ||||
else: | ||||
MinRK
|
r13649 | serialize.pickle = dill | ||
MinRK
|
r13636 | |||
# disable special function handling, let dill take care of it | ||||
can_map.pop(FunctionType, None) | ||||
James Porter
|
r16451 | def use_cloudpickle(): | ||
"""use cloudpickle to expand serialization support | ||||
adds support for object methods and closures to serialization. | ||||
""" | ||||
from cloud.serialization import cloudpickle | ||||
global pickle | ||||
pickle = cloudpickle | ||||
try: | ||||
from IPython.kernel.zmq import serialize | ||||
except ImportError: | ||||
pass | ||||
else: | ||||
serialize.pickle = cloudpickle | ||||
# disable special function handling, let cloudpickle take care of it | ||||
can_map.pop(FunctionType, None) | ||||
MinRK
|
r13636 | |||
#------------------------------------------------------------------------------- | ||||
MinRK
|
r3607 | # Classes | ||
#------------------------------------------------------------------------------- | ||||
MinRK
|
r3539 | class CannedObject(object): | ||
MinRK
|
r9985 | def __init__(self, obj, keys=[], hook=None): | ||
MinRK
|
r9987 | """can an object for safe pickling | ||
Parameters | ||||
========== | ||||
obj: | ||||
The object to be canned | ||||
keys: list (optional) | ||||
list of attribute names that will be explicitly canned / uncanned | ||||
hook: callable (optional) | ||||
An optional extra callable, | ||||
which can do additional processing of the uncanned object. | ||||
large data may be offloaded into the buffers list, | ||||
used for zero-copy transfers. | ||||
""" | ||||
MinRK
|
r3546 | self.keys = keys | ||
MinRK
|
r3607 | self.obj = copy.copy(obj) | ||
MinRK
|
r9985 | self.hook = can(hook) | ||
MinRK
|
r3546 | for key in keys: | ||
MinRK
|
r3607 | setattr(self.obj, key, can(getattr(obj, key))) | ||
MinRK
|
r7967 | |||
self.buffers = [] | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r7967 | def get_object(self, g=None): | ||
MinRK
|
r3546 | if g is None: | ||
MinRK
|
r7967 | g = {} | ||
MinRK
|
r9985 | obj = self.obj | ||
MinRK
|
r3546 | for key in self.keys: | ||
MinRK
|
r9985 | setattr(obj, key, uncan(getattr(obj, key), g)) | ||
MinRK
|
r9986 | |||
MinRK
|
r9985 | if self.hook: | ||
self.hook = uncan(self.hook, g) | ||||
self.hook(obj, g) | ||||
MinRK
|
r3546 | return self.obj | ||
MinRK
|
r7967 | |||
MinRK
|
r3546 | |||
MinRK
|
r3643 | class Reference(CannedObject): | ||
"""object for wrapping a remote reference by name.""" | ||||
def __init__(self, name): | ||||
Thomas Kluyver
|
r13353 | if not isinstance(name, string_types): | ||
MinRK
|
r3643 | raise TypeError("illegal name: %r"%name) | ||
self.name = name | ||||
MinRK
|
r7967 | self.buffers = [] | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3643 | def __repr__(self): | ||
return "<Reference: %r>"%self.name | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r7967 | def get_object(self, g=None): | ||
MinRK
|
r3643 | if g is None: | ||
MinRK
|
r7967 | g = {} | ||
MinRK
|
r6159 | |||
return eval(self.name, g) | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3546 | |||
MinRK
|
r16506 | class CannedCell(CannedObject): | ||
"""Can a closure cell""" | ||||
def __init__(self, cell): | ||||
self.cell_contents = can(cell.cell_contents) | ||||
def get_object(self, g=None): | ||||
cell_contents = uncan(self.cell_contents, g) | ||||
def inner(): | ||||
return cell_contents | ||||
return py3compat.get_closure(inner)[0] | ||||
MinRK
|
r3539 | class CannedFunction(CannedObject): | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3539 | def __init__(self, f): | ||
MinRK
|
r7967 | self._check_type(f) | ||
Thomas Kluyver
|
r13362 | self.code = f.__code__ | ||
if f.__defaults__: | ||||
self.defaults = [ can(fd) for fd in f.__defaults__ ] | ||||
MinRK
|
r8041 | else: | ||
self.defaults = None | ||||
MinRK
|
r16506 | |||
closure = py3compat.get_closure(f) | ||||
if closure: | ||||
self.closure = tuple( can(cell) for cell in closure ) | ||||
else: | ||||
self.closure = None | ||||
MinRK
|
r3664 | self.module = f.__module__ or '__main__' | ||
MinRK
|
r3607 | self.__name__ = f.__name__ | ||
MinRK
|
r7967 | self.buffers = [] | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r7967 | def _check_type(self, obj): | ||
MinRK
|
r3539 | assert isinstance(obj, FunctionType), "Not a function type" | ||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r7967 | def get_object(self, g=None): | ||
MinRK
|
r3664 | # try to load function back into its module: | ||
if not self.module.startswith('__'): | ||||
MinRK
|
r8405 | __import__(self.module) | ||
g = sys.modules[self.module].__dict__ | ||||
Bernardo B. Marques
|
r4872 | |||
MinRK
|
r3539 | if g is None: | ||
MinRK
|
r7967 | g = {} | ||
MinRK
|
r8041 | if self.defaults: | ||
defaults = tuple(uncan(cfd, g) for cfd in self.defaults) | ||||
else: | ||||
defaults = None | ||||
MinRK
|
r16506 | if self.closure: | ||
closure = tuple(uncan(cell, g) for cell in self.closure) | ||||
else: | ||||
closure = None | ||||
newFunc = FunctionType(self.code, g, self.__name__, defaults, closure) | ||||
MinRK
|
r3539 | return newFunc | ||
MinRK
|
r9000 | class CannedClass(CannedObject): | ||
def __init__(self, cls): | ||||
self._check_type(cls) | ||||
self.name = cls.__name__ | ||||
self.old_style = not isinstance(cls, type) | ||||
self._canned_dict = {} | ||||
for k,v in cls.__dict__.items(): | ||||
if k not in ('__weakref__', '__dict__'): | ||||
self._canned_dict[k] = can(v) | ||||
if self.old_style: | ||||
mro = [] | ||||
else: | ||||
mro = cls.mro() | ||||
self.parents = [ can(c) for c in mro[1:] ] | ||||
self.buffers = [] | ||||
def _check_type(self, obj): | ||||
MinRK
|
r9002 | assert isinstance(obj, class_type), "Not a class type" | ||
MinRK
|
r9000 | |||
def get_object(self, g=None): | ||||
parents = tuple(uncan(p, g) for p in self.parents) | ||||
return type(self.name, parents, uncan_dict(self._canned_dict, g=g)) | ||||
MinRK
|
r7967 | |||
class CannedArray(CannedObject): | ||||
def __init__(self, obj): | ||||
MinRK
|
r12541 | from numpy import ascontiguousarray | ||
MinRK
|
r7967 | self.shape = obj.shape | ||
MinRK
|
r7971 | self.dtype = obj.dtype.descr if obj.dtype.fields else obj.dtype.str | ||
MinRK
|
r14901 | self.pickled = False | ||
MinRK
|
r7967 | if sum(obj.shape) == 0: | ||
MinRK
|
r14901 | self.pickled = True | ||
elif obj.dtype == 'O': | ||||
# can't handle object dtype with buffer approach | ||||
self.pickled = True | ||||
elif obj.dtype.fields and any(dt == 'O' for dt,sz in obj.dtype.fields.values()): | ||||
self.pickled = True | ||||
if self.pickled: | ||||
MinRK
|
r7967 | # just pickle it | ||
MinRK
|
r17044 | self.buffers = [pickle.dumps(obj, PICKLE_PROTOCOL)] | ||
MinRK
|
r7967 | else: | ||
# ensure contiguous | ||||
MinRK
|
r12541 | obj = ascontiguousarray(obj, dtype=None) | ||
MinRK
|
r7967 | self.buffers = [buffer(obj)] | ||
def get_object(self, g=None): | ||||
MinRK
|
r12541 | from numpy import frombuffer | ||
MinRK
|
r7967 | data = self.buffers[0] | ||
MinRK
|
r14901 | if self.pickled: | ||
MinRK
|
r7967 | # no shape, we just pickled it | ||
return pickle.loads(data) | ||||
else: | ||||
MinRK
|
r12541 | return frombuffer(data, dtype=self.dtype).reshape(self.shape) | ||
MinRK
|
r7967 | |||
class CannedBytes(CannedObject): | ||||
wrap = bytes | ||||
def __init__(self, obj): | ||||
self.buffers = [obj] | ||||
def get_object(self, g=None): | ||||
data = self.buffers[0] | ||||
return self.wrap(data) | ||||
def CannedBuffer(CannedBytes): | ||||
wrap = buffer | ||||
MinRK
|
r3607 | #------------------------------------------------------------------------------- | ||
# Functions | ||||
#------------------------------------------------------------------------------- | ||||
MinRK
|
r8081 | def _import_mapping(mapping, original=None): | ||
"""import any string-keys in a type mapping | ||||
""" | ||||
MinRK
|
r17057 | log = get_logger() | ||
MinRK
|
r8081 | log.debug("Importing canning map") | ||
Thomas Kluyver
|
r13391 | for key,value in list(mapping.items()): | ||
Thomas Kluyver
|
r13353 | if isinstance(key, string_types): | ||
MinRK
|
r8081 | try: | ||
cls = import_item(key) | ||||
except Exception: | ||||
if original and key not in original: | ||||
# only message on user-added classes | ||||
MinRK
|
r12541 | log.error("canning class not importable: %r", key, exc_info=True) | ||
MinRK
|
r8081 | mapping.pop(key) | ||
else: | ||||
mapping[cls] = mapping.pop(key) | ||||
MinRK
|
r3539 | |||
MinRK
|
r9138 | def istype(obj, check): | ||
"""like isinstance(obj, check), but strict | ||||
This won't catch subclasses. | ||||
""" | ||||
if isinstance(check, tuple): | ||||
for cls in check: | ||||
if type(obj) is cls: | ||||
return True | ||||
return False | ||||
else: | ||||
return type(obj) is check | ||||
MinRK
|
r7967 | def can(obj): | ||
"""prepare an object for pickling""" | ||||
MinRK
|
r8081 | |||
import_needed = False | ||||
Thomas Kluyver
|
r13361 | for cls,canner in iteritems(can_map): | ||
Thomas Kluyver
|
r13353 | if isinstance(cls, string_types): | ||
MinRK
|
r8081 | import_needed = True | ||
break | ||||
MinRK
|
r9138 | elif istype(obj, cls): | ||
MinRK
|
r7967 | return canner(obj) | ||
MinRK
|
r8081 | |||
if import_needed: | ||||
# perform can_map imports, then try again | ||||
# this will usually only happen once | ||||
_import_mapping(can_map, _original_can_map) | ||||
return can(obj) | ||||
MinRK
|
r7967 | return obj | ||
MinRK
|
r9000 | def can_class(obj): | ||
MinRK
|
r9002 | if isinstance(obj, class_type) and obj.__module__ == '__main__': | ||
MinRK
|
r9000 | return CannedClass(obj) | ||
else: | ||||
return obj | ||||
MinRK
|
r7967 | def can_dict(obj): | ||
"""can the *values* of a dict""" | ||||
MinRK
|
r9712 | if istype(obj, dict): | ||
MinRK
|
r3539 | newobj = {} | ||
Thomas Kluyver
|
r13361 | for k, v in iteritems(obj): | ||
MinRK
|
r3539 | newobj[k] = can(v) | ||
return newobj | ||||
else: | ||||
return obj | ||||
MinRK
|
r9712 | sequence_types = (list, tuple, set) | ||
MinRK
|
r7967 | def can_sequence(obj): | ||
"""can the elements of a sequence""" | ||||
MinRK
|
r9712 | if istype(obj, sequence_types): | ||
MinRK
|
r3539 | t = type(obj) | ||
return t([can(i) for i in obj]) | ||||
else: | ||||
return obj | ||||
def uncan(obj, g=None): | ||||
MinRK
|
r7967 | """invert canning""" | ||
MinRK
|
r8081 | |||
import_needed = False | ||||
Thomas Kluyver
|
r13361 | for cls,uncanner in iteritems(uncan_map): | ||
Thomas Kluyver
|
r13353 | if isinstance(cls, string_types): | ||
MinRK
|
r8081 | import_needed = True | ||
break | ||||
elif isinstance(obj, cls): | ||||
MinRK
|
r7967 | return uncanner(obj, g) | ||
MinRK
|
r8081 | |||
if import_needed: | ||||
# perform uncan_map imports, then try again | ||||
# this will usually only happen once | ||||
_import_mapping(uncan_map, _original_uncan_map) | ||||
return uncan(obj, g) | ||||
MinRK
|
r7967 | return obj | ||
def uncan_dict(obj, g=None): | ||||
MinRK
|
r9712 | if istype(obj, dict): | ||
MinRK
|
r3539 | newobj = {} | ||
Thomas Kluyver
|
r13361 | for k, v in iteritems(obj): | ||
MinRK
|
r3539 | newobj[k] = uncan(v,g) | ||
return newobj | ||||
else: | ||||
return obj | ||||
MinRK
|
r7967 | def uncan_sequence(obj, g=None): | ||
MinRK
|
r9712 | if istype(obj, sequence_types): | ||
MinRK
|
r3539 | t = type(obj) | ||
return t([uncan(i,g) for i in obj]) | ||||
else: | ||||
return obj | ||||
MinRK
|
r9986 | def _uncan_dependent_hook(dep, g=None): | ||
dep.check_dependency() | ||||
def can_dependent(obj): | ||||
return CannedObject(obj, keys=('f', 'df'), hook=_uncan_dependent_hook) | ||||
MinRK
|
r3539 | |||
MinRK
|
r7967 | #------------------------------------------------------------------------------- | ||
MinRK
|
r8081 | # API dictionaries | ||
MinRK
|
r7967 | #------------------------------------------------------------------------------- | ||
# These dicts can be extended for custom serialization of new objects | ||||
can_map = { | ||||
MinRK
|
r9986 | 'IPython.parallel.dependent' : can_dependent, | ||
MinRK
|
r7967 | 'numpy.ndarray' : CannedArray, | ||
FunctionType : CannedFunction, | ||||
bytes : CannedBytes, | ||||
buffer : CannedBuffer, | ||||
MinRK
|
r16506 | cell_type : CannedCell, | ||
MinRK
|
r9002 | class_type : can_class, | ||
MinRK
|
r7967 | } | ||
uncan_map = { | ||||
CannedObject : lambda obj, g: obj.get_object(g), | ||||
} | ||||
MinRK
|
r8081 | # for use in _import_mapping: | ||
_original_can_map = can_map.copy() | ||||
_original_uncan_map = uncan_map.copy() | ||||