##// END OF EJS Templates
coalesce stream output in the notebook...
coalesce stream output in the notebook This merges consecutive outputs on one stream into a single output. Essentially, it applies the same merging that we do visually to the content stored in the notebook document. This results in a massive performance improvement in load-time and storage size for notebooks that have many calls to `sys.stdout.flush()`.

File last commit:

r17067:4a94456c merge
r17305:bd91397c
Show More
pickleutil.py
425 lines | 11.6 KiB | text/x-python | PythonLexer
MinRK
prep newparallel for rebase...
r3539 # encoding: utf-8
"""Pickle related utilities. Perhaps this should be called 'can'."""
MinRK
handle simple closures in pickleutil...
r16506 # Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
MinRK
prep newparallel for rebase...
r3539
MinRK
Improvements to dependency handling...
r3607 import copy
MinRK
use logger for canning import error
r8034 import logging
MinRK
update API after sagedays29...
r3664 import sys
MinRK
py3compat for class canning
r9002 from types import FunctionType
MinRK
Improvements to dependency handling...
r3607
MinRK
better serialization for parallel code...
r7967 try:
import cPickle as pickle
except ImportError:
import pickle
Thomas Kluyver
Use explicit relative imports...
r13347 from . import codeutil # This registers a hook when it's imported
from . import py3compat
from .importstring import import_item
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 from .py3compat import string_types, iteritems
MinRK
better serialization for parallel code...
r7967
MinRK
use logger for canning import error
r8034 from IPython.config import Application
MinRK
use utils.log.get_logger where appropriate
r17057 from IPython.utils.log import get_logger
MinRK
use logger for canning import error
r8034
MinRK
better serialization for parallel code...
r7967 if py3compat.PY3:
buffer = memoryview
MinRK
py3compat for class canning
r9002 class_type = type
else:
from types import ClassType
class_type = (type, ClassType)
MinRK
prep newparallel for rebase...
r3539
MinRK
add pickleutil.PICKLE_PROTOCOL...
r17044 try:
PICKLE_PROTOCOL = pickle.DEFAULT_PROTOCOL
except AttributeError:
PICKLE_PROTOCOL = pickle.HIGHEST_PROTOCOL
MinRK
handle simple closures in pickleutil...
r16506 def _get_cell_type(a=None):
"""the type of a closure cell doesn't seem to be importable,
so just create one
"""
def inner():
return a
return type(py3compat.get_closure(inner)[0])
cell_type = _get_cell_type()
MinRK
Improvements to dependency handling...
r3607 #-------------------------------------------------------------------------------
MinRK
add utils.pickleutil.use_dill...
r13636 # Functions
#-------------------------------------------------------------------------------
def use_dill():
"""use dill to expand serialization support
adds support for object methods and closures to serialization.
"""
# import dill causes most of the magic
import dill
# dill doesn't work with cPickle,
# tell the two relevant modules to use plain pickle
global pickle
MinRK
change how dill is enabled...
r13649 pickle = dill
MinRK
add utils.pickleutil.use_dill...
r13636
try:
from IPython.kernel.zmq import serialize
except ImportError:
pass
else:
MinRK
change how dill is enabled...
r13649 serialize.pickle = dill
MinRK
add utils.pickleutil.use_dill...
r13636
# disable special function handling, let dill take care of it
can_map.pop(FunctionType, None)
James Porter
support use_cloudpickle in addition to use_dill
r16451 def use_cloudpickle():
"""use cloudpickle to expand serialization support
adds support for object methods and closures to serialization.
"""
from cloud.serialization import cloudpickle
global pickle
pickle = cloudpickle
try:
from IPython.kernel.zmq import serialize
except ImportError:
pass
else:
serialize.pickle = cloudpickle
# disable special function handling, let cloudpickle take care of it
can_map.pop(FunctionType, None)
MinRK
add utils.pickleutil.use_dill...
r13636
#-------------------------------------------------------------------------------
MinRK
Improvements to dependency handling...
r3607 # Classes
#-------------------------------------------------------------------------------
MinRK
prep newparallel for rebase...
r3539 class CannedObject(object):
MinRK
add hook to CannedObject...
r9985 def __init__(self, obj, keys=[], hook=None):
MinRK
docstring for CannedObject.__init__
r9987 """can an object for safe pickling
Parameters
==========
obj:
The object to be canned
keys: list (optional)
list of attribute names that will be explicitly canned / uncanned
hook: callable (optional)
An optional extra callable,
which can do additional processing of the uncanned object.
large data may be offloaded into the buffers list,
used for zero-copy transfers.
"""
MinRK
added dependency decorator
r3546 self.keys = keys
MinRK
Improvements to dependency handling...
r3607 self.obj = copy.copy(obj)
MinRK
add hook to CannedObject...
r9985 self.hook = can(hook)
MinRK
added dependency decorator
r3546 for key in keys:
MinRK
Improvements to dependency handling...
r3607 setattr(self.obj, key, can(getattr(obj, key)))
MinRK
better serialization for parallel code...
r7967
self.buffers = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def get_object(self, g=None):
MinRK
added dependency decorator
r3546 if g is None:
MinRK
better serialization for parallel code...
r7967 g = {}
MinRK
add hook to CannedObject...
r9985 obj = self.obj
MinRK
added dependency decorator
r3546 for key in self.keys:
MinRK
add hook to CannedObject...
r9985 setattr(obj, key, uncan(getattr(obj, key), g))
MinRK
use canning hook in dependent...
r9986
MinRK
add hook to CannedObject...
r9985 if self.hook:
self.hook = uncan(self.hook, g)
self.hook(obj, g)
MinRK
added dependency decorator
r3546 return self.obj
MinRK
better serialization for parallel code...
r7967
MinRK
added dependency decorator
r3546
MinRK
add Reference object
r3643 class Reference(CannedObject):
"""object for wrapping a remote reference by name."""
def __init__(self, name):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if not isinstance(name, string_types):
MinRK
add Reference object
r3643 raise TypeError("illegal name: %r"%name)
self.name = name
MinRK
better serialization for parallel code...
r7967 self.buffers = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
add Reference object
r3643 def __repr__(self):
return "<Reference: %r>"%self.name
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def get_object(self, g=None):
MinRK
add Reference object
r3643 if g is None:
MinRK
better serialization for parallel code...
r7967 g = {}
MinRK
use eval to uncan References...
r6159
return eval(self.name, g)
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
added dependency decorator
r3546
MinRK
handle simple closures in pickleutil...
r16506 class CannedCell(CannedObject):
"""Can a closure cell"""
def __init__(self, cell):
self.cell_contents = can(cell.cell_contents)
def get_object(self, g=None):
cell_contents = uncan(self.cell_contents, g)
def inner():
return cell_contents
return py3compat.get_closure(inner)[0]
MinRK
prep newparallel for rebase...
r3539 class CannedFunction(CannedObject):
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
prep newparallel for rebase...
r3539 def __init__(self, f):
MinRK
better serialization for parallel code...
r7967 self._check_type(f)
Thomas Kluyver
Update function attribute names...
r13362 self.code = f.__code__
if f.__defaults__:
self.defaults = [ can(fd) for fd in f.__defaults__ ]
MinRK
can func_defaults...
r8041 else:
self.defaults = None
MinRK
handle simple closures in pickleutil...
r16506
closure = py3compat.get_closure(f)
if closure:
self.closure = tuple( can(cell) for cell in closure )
else:
self.closure = None
MinRK
update API after sagedays29...
r3664 self.module = f.__module__ or '__main__'
MinRK
Improvements to dependency handling...
r3607 self.__name__ = f.__name__
MinRK
better serialization for parallel code...
r7967 self.buffers = []
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def _check_type(self, obj):
MinRK
prep newparallel for rebase...
r3539 assert isinstance(obj, FunctionType), "Not a function type"
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
better serialization for parallel code...
r7967 def get_object(self, g=None):
MinRK
update API after sagedays29...
r3664 # try to load function back into its module:
if not self.module.startswith('__'):
MinRK
Don't catch ImportError when trying to unpack module functions...
r8405 __import__(self.module)
g = sys.modules[self.module].__dict__
Bernardo B. Marques
remove all trailling spaces
r4872
MinRK
prep newparallel for rebase...
r3539 if g is None:
MinRK
better serialization for parallel code...
r7967 g = {}
MinRK
can func_defaults...
r8041 if self.defaults:
defaults = tuple(uncan(cfd, g) for cfd in self.defaults)
else:
defaults = None
MinRK
handle simple closures in pickleutil...
r16506 if self.closure:
closure = tuple(uncan(cell, g) for cell in self.closure)
else:
closure = None
newFunc = FunctionType(self.code, g, self.__name__, defaults, closure)
MinRK
prep newparallel for rebase...
r3539 return newFunc
MinRK
can classes
r9000 class CannedClass(CannedObject):
def __init__(self, cls):
self._check_type(cls)
self.name = cls.__name__
self.old_style = not isinstance(cls, type)
self._canned_dict = {}
for k,v in cls.__dict__.items():
if k not in ('__weakref__', '__dict__'):
self._canned_dict[k] = can(v)
if self.old_style:
mro = []
else:
mro = cls.mro()
self.parents = [ can(c) for c in mro[1:] ]
self.buffers = []
def _check_type(self, obj):
MinRK
py3compat for class canning
r9002 assert isinstance(obj, class_type), "Not a class type"
MinRK
can classes
r9000
def get_object(self, g=None):
parents = tuple(uncan(p, g) for p in self.parents)
return type(self.name, parents, uncan_dict(self._canned_dict, g=g))
MinRK
better serialization for parallel code...
r7967
class CannedArray(CannedObject):
def __init__(self, obj):
MinRK
don't import numpy in pickleutil until it is used...
r12541 from numpy import ascontiguousarray
MinRK
better serialization for parallel code...
r7967 self.shape = obj.shape
MinRK
use brief dtype, rather than full object
r7971 self.dtype = obj.dtype.descr if obj.dtype.fields else obj.dtype.str
MinRK
pickle arrays with dtype=object...
r14901 self.pickled = False
MinRK
better serialization for parallel code...
r7967 if sum(obj.shape) == 0:
MinRK
pickle arrays with dtype=object...
r14901 self.pickled = True
elif obj.dtype == 'O':
# can't handle object dtype with buffer approach
self.pickled = True
elif obj.dtype.fields and any(dt == 'O' for dt,sz in obj.dtype.fields.values()):
self.pickled = True
if self.pickled:
MinRK
better serialization for parallel code...
r7967 # just pickle it
MinRK
add pickleutil.PICKLE_PROTOCOL...
r17044 self.buffers = [pickle.dumps(obj, PICKLE_PROTOCOL)]
MinRK
better serialization for parallel code...
r7967 else:
# ensure contiguous
MinRK
don't import numpy in pickleutil until it is used...
r12541 obj = ascontiguousarray(obj, dtype=None)
MinRK
better serialization for parallel code...
r7967 self.buffers = [buffer(obj)]
def get_object(self, g=None):
MinRK
don't import numpy in pickleutil until it is used...
r12541 from numpy import frombuffer
MinRK
better serialization for parallel code...
r7967 data = self.buffers[0]
MinRK
pickle arrays with dtype=object...
r14901 if self.pickled:
MinRK
better serialization for parallel code...
r7967 # no shape, we just pickled it
return pickle.loads(data)
else:
MinRK
don't import numpy in pickleutil until it is used...
r12541 return frombuffer(data, dtype=self.dtype).reshape(self.shape)
MinRK
better serialization for parallel code...
r7967
class CannedBytes(CannedObject):
wrap = bytes
def __init__(self, obj):
self.buffers = [obj]
def get_object(self, g=None):
data = self.buffers[0]
return self.wrap(data)
def CannedBuffer(CannedBytes):
wrap = buffer
MinRK
Improvements to dependency handling...
r3607 #-------------------------------------------------------------------------------
# Functions
#-------------------------------------------------------------------------------
MinRK
adjust how canning deals with import strings...
r8081 def _import_mapping(mapping, original=None):
"""import any string-keys in a type mapping
"""
MinRK
use utils.log.get_logger where appropriate
r17057 log = get_logger()
MinRK
adjust how canning deals with import strings...
r8081 log.debug("Importing canning map")
Thomas Kluyver
Fix for dictionary iteration in pickleutil
r13391 for key,value in list(mapping.items()):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(key, string_types):
MinRK
adjust how canning deals with import strings...
r8081 try:
cls = import_item(key)
except Exception:
if original and key not in original:
# only message on user-added classes
MinRK
don't import numpy in pickleutil until it is used...
r12541 log.error("canning class not importable: %r", key, exc_info=True)
MinRK
adjust how canning deals with import strings...
r8081 mapping.pop(key)
else:
mapping[cls] = mapping.pop(key)
MinRK
prep newparallel for rebase...
r3539
MinRK
add istype to canning...
r9138 def istype(obj, check):
"""like isinstance(obj, check), but strict
This won't catch subclasses.
"""
if isinstance(check, tuple):
for cls in check:
if type(obj) is cls:
return True
return False
else:
return type(obj) is check
MinRK
better serialization for parallel code...
r7967 def can(obj):
"""prepare an object for pickling"""
MinRK
adjust how canning deals with import strings...
r8081
import_needed = False
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for cls,canner in iteritems(can_map):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(cls, string_types):
MinRK
adjust how canning deals with import strings...
r8081 import_needed = True
break
MinRK
add istype to canning...
r9138 elif istype(obj, cls):
MinRK
better serialization for parallel code...
r7967 return canner(obj)
MinRK
adjust how canning deals with import strings...
r8081
if import_needed:
# perform can_map imports, then try again
# this will usually only happen once
_import_mapping(can_map, _original_can_map)
return can(obj)
MinRK
better serialization for parallel code...
r7967 return obj
MinRK
can classes
r9000 def can_class(obj):
MinRK
py3compat for class canning
r9002 if isinstance(obj, class_type) and obj.__module__ == '__main__':
MinRK
can classes
r9000 return CannedClass(obj)
else:
return obj
MinRK
better serialization for parallel code...
r7967 def can_dict(obj):
"""can the *values* of a dict"""
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, dict):
MinRK
prep newparallel for rebase...
r3539 newobj = {}
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k, v in iteritems(obj):
MinRK
prep newparallel for rebase...
r3539 newobj[k] = can(v)
return newobj
else:
return obj
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 sequence_types = (list, tuple, set)
MinRK
better serialization for parallel code...
r7967 def can_sequence(obj):
"""can the elements of a sequence"""
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, sequence_types):
MinRK
prep newparallel for rebase...
r3539 t = type(obj)
return t([can(i) for i in obj])
else:
return obj
def uncan(obj, g=None):
MinRK
better serialization for parallel code...
r7967 """invert canning"""
MinRK
adjust how canning deals with import strings...
r8081
import_needed = False
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for cls,uncanner in iteritems(uncan_map):
Thomas Kluyver
Replace references to unicode and basestring
r13353 if isinstance(cls, string_types):
MinRK
adjust how canning deals with import strings...
r8081 import_needed = True
break
elif isinstance(obj, cls):
MinRK
better serialization for parallel code...
r7967 return uncanner(obj, g)
MinRK
adjust how canning deals with import strings...
r8081
if import_needed:
# perform uncan_map imports, then try again
# this will usually only happen once
_import_mapping(uncan_map, _original_uncan_map)
return uncan(obj, g)
MinRK
better serialization for parallel code...
r7967 return obj
def uncan_dict(obj, g=None):
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, dict):
MinRK
prep newparallel for rebase...
r3539 newobj = {}
Thomas Kluyver
Fix references to dict.iteritems and dict.itervalues
r13361 for k, v in iteritems(obj):
MinRK
prep newparallel for rebase...
r3539 newobj[k] = uncan(v,g)
return newobj
else:
return obj
MinRK
better serialization for parallel code...
r7967 def uncan_sequence(obj, g=None):
MinRK
use istype instead of isinstance for canning tuples/lists...
r9712 if istype(obj, sequence_types):
MinRK
prep newparallel for rebase...
r3539 t = type(obj)
return t([uncan(i,g) for i in obj])
else:
return obj
MinRK
use canning hook in dependent...
r9986 def _uncan_dependent_hook(dep, g=None):
dep.check_dependency()
def can_dependent(obj):
return CannedObject(obj, keys=('f', 'df'), hook=_uncan_dependent_hook)
MinRK
prep newparallel for rebase...
r3539
MinRK
better serialization for parallel code...
r7967 #-------------------------------------------------------------------------------
MinRK
adjust how canning deals with import strings...
r8081 # API dictionaries
MinRK
better serialization for parallel code...
r7967 #-------------------------------------------------------------------------------
# These dicts can be extended for custom serialization of new objects
can_map = {
MinRK
use canning hook in dependent...
r9986 'IPython.parallel.dependent' : can_dependent,
MinRK
better serialization for parallel code...
r7967 'numpy.ndarray' : CannedArray,
FunctionType : CannedFunction,
bytes : CannedBytes,
buffer : CannedBuffer,
MinRK
handle simple closures in pickleutil...
r16506 cell_type : CannedCell,
MinRK
py3compat for class canning
r9002 class_type : can_class,
MinRK
better serialization for parallel code...
r7967 }
uncan_map = {
CannedObject : lambda obj, g: obj.get_object(g),
}
MinRK
adjust how canning deals with import strings...
r8081 # for use in _import_mapping:
_original_can_map = can_map.copy()
_original_uncan_map = uncan_map.copy()