upstream/ipython Commit - r6243:095d04ac

enable non-copying sends in push...

MinRK -

r6243:095d04ac

parent child

IPython/parallel/client/view.py

0 +1 -1

             """Views of remote engines.
             Authors:
             * Min RK
             """
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010-2011  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             import imp
             import sys
             import warnings
             from contextlib import contextmanager
             from types import ModuleType
             import zmq
             from IPython.testing.skipdoctest import skip_doctest
             from IPython.utils.traitlets import (
                 HasTraits, Any, Bool, List, Dict, Set, Instance, CFloat, Integer
             )
             from IPython.external.decorator import decorator
             from IPython.parallel import util
             from IPython.parallel.controller.dependency import Dependency, dependent
             from . import map as Map
             from .asyncresult import AsyncResult, AsyncMapResult
             from .remotefunction import ParallelFunction, parallel, remote, getname
             #-----------------------------------------------------------------------------
             # Decorators
             #-----------------------------------------------------------------------------
             @decorator
             def save_ids(f, self, *args, **kwargs):
                 """Keep our history and outstanding attributes up to date after a method call."""
                 n_previous = len(self.client.history)
                 try:
                     ret = f(self, *args, **kwargs)
                 finally:
                     nmsgs = len(self.client.history) - n_previous
                     msg_ids = self.client.history[-nmsgs:]
                     self.history.extend(msg_ids)
                     map(self.outstanding.add, msg_ids)
                 return ret
             @decorator
             def sync_results(f, self, *args, **kwargs):
                 """sync relevant results from self.client to our results attribute."""
                 ret = f(self, *args, **kwargs)
                 delta = self.outstanding.difference(self.client.outstanding)
                 completed = self.outstanding.intersection(delta)
                 self.outstanding = self.outstanding.difference(completed)
                 for msg_id in completed:
                     self.results[msg_id] = self.client.results[msg_id]
                 return ret
             @decorator
             def spin_after(f, self, *args, **kwargs):
                 """call spin after the method."""
                 ret = f(self, *args, **kwargs)
                 self.spin()
                 return ret
             #-----------------------------------------------------------------------------
             # Classes
             #-----------------------------------------------------------------------------
             @skip_doctest
             class View(HasTraits):
                 """Base View class for more convenint apply(f,*args,**kwargs) syntax via attributes.
                 Don't use this class, use subclasses.
                 Methods
                 -------
                 spin
                     flushes incoming results and registration state changes
                     control methods spin, and requesting `ids` also ensures up to date
                 wait
                     wait on one or more msg_ids
                 execution methods
                     apply
                     legacy: execute, run
                 data movement
                     push, pull, scatter, gather
                 query methods
                     get_result, queue_status, purge_results, result_status
                 control methods
                     abort, shutdown
                 """
                 # flags
                 block=Bool(False)
                 track=Bool(True)
                 targets = Any()
                 history=List()
                 outstanding = Set()
                 results = Dict()
                 client = Instance('IPython.parallel.Client')
                 _socket = Instance('zmq.Socket')
                 _flag_names = List(['targets', 'block', 'track'])
                 _targets = Any()
                 _idents = Any()
                 def __init__(self, client=None, socket=None, **flags):
                     super(View, self).__init__(client=client, _socket=socket)
                     self.block = client.block
                     self.set_flags(**flags)
                     assert not self.__class__ is View, "Don't use base View objects, use subclasses"
                 def __repr__(self):
                     strtargets = str(self.targets)
                     if len(strtargets) > 16:
                         strtargets = strtargets[:12]+'...]'
                     return "<%s %s>"%(self.__class__.__name__, strtargets)
                 def set_flags(self, **kwargs):
                     """set my attribute flags by keyword.
                     Views determine behavior with a few attributes (`block`, `track`, etc.).
                     These attributes can be set all at once by name with this method.
                     Parameters
                     ----------
                     block : bool
                         whether to wait for results
                     track : bool
                         whether to create a MessageTracker to allow the user to
                         safely edit after arrays and buffers during non-copying
                         sends.
                     """
                     for name, value in kwargs.iteritems():
                         if name not in self._flag_names:
                             raise KeyError("Invalid name: %r"%name)
                         else:
                             setattr(self, name, value)
                 @contextmanager
                 def temp_flags(self, **kwargs):
                     """temporarily set flags, for use in `with` statements.
                     See set_flags for permanent setting of flags
                     Examples
                     --------
                     >>> view.track=False
                     ...
                     >>> with view.temp_flags(track=True):
                     ...    ar = view.apply(dostuff, my_big_array)
                     ...    ar.tracker.wait() # wait for send to finish
                     >>> view.track
                     False
                     """
                     # preflight: save flags, and set temporaries
                     saved_flags = {}
                     for f in self._flag_names:
                         saved_flags[f] = getattr(self, f)
                     self.set_flags(**kwargs)
                     # yield to the with-statement block
                     try:
                         yield
                     finally:
                         # postflight: restore saved flags
                         self.set_flags(**saved_flags)
                 #----------------------------------------------------------------
                 # apply
                 #----------------------------------------------------------------
                 @sync_results
                 @save_ids
                 def _really_apply(self, f, args, kwargs, block=None, **options):
                     """wrapper for client.send_apply_message"""
                     raise NotImplementedError("Implement in subclasses")
                 def apply(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) on remote engines, returning the result.
                     This method sets all apply flags via this View's attributes.
                     if self.block is False:
                         returns AsyncResult
                     else:
                         returns actual result of f(*args, **kwargs)
                     """
                     return self._really_apply(f, args, kwargs)
                 def apply_async(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) on remote engines in a nonblocking manner.
                     returns AsyncResult
                     """
                     return self._really_apply(f, args, kwargs, block=False)
                 @spin_after
                 def apply_sync(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) on remote engines in a blocking manner,
                      returning the result.
                     returns: actual result of f(*args, **kwargs)
                     """
                     return self._really_apply(f, args, kwargs, block=True)
                 #----------------------------------------------------------------
                 # wrappers for client and control methods
                 #----------------------------------------------------------------
                 @sync_results
                 def spin(self):
                     """spin the client, and sync"""
                     self.client.spin()
                 @sync_results
                 def wait(self, jobs=None, timeout=-1):
                     """waits on one or more `jobs`, for up to `timeout` seconds.
                     Parameters
                     ----------
                     jobs : int, str, or list of ints and/or strs, or one or more AsyncResult objects
                             ints are indices to self.history
                             strs are msg_ids
                             default: wait on all outstanding messages
                     timeout : float
                             a time in seconds, after which to give up.
                             default is -1, which means no timeout
                     Returns
                     -------
                     True : when all msg_ids are done
                     False : timeout reached, some msg_ids still outstanding
                     """
                     if jobs is None:
                         jobs = self.history
                     return self.client.wait(jobs, timeout)
                 def abort(self, jobs=None, targets=None, block=None):
                     """Abort jobs on my engines.
                     Parameters
                     ----------
                     jobs : None, str, list of strs, optional
                         if None: abort all jobs.
                         else: abort specific msg_id(s).
                     """
                     block = block if block is not None else self.block
                     targets = targets if targets is not None else self.targets
                     jobs = jobs if jobs is not None else list(self.outstanding)
                     return self.client.abort(jobs=jobs, targets=targets, block=block)
                 def queue_status(self, targets=None, verbose=False):
                     """Fetch the Queue status of my engines"""
                     targets = targets if targets is not None else self.targets
                     return self.client.queue_status(targets=targets, verbose=verbose)
                 def purge_results(self, jobs=[], targets=[]):
                     """Instruct the controller to forget specific results."""
                     if targets is None or targets == 'all':
                         targets = self.targets
                     return self.client.purge_results(jobs=jobs, targets=targets)
                 def shutdown(self, targets=None, restart=False, hub=False, block=None):
                     """Terminates one or more engine processes, optionally including the hub.
                     """
                     block = self.block if block is None else block
                     if targets is None or targets == 'all':
                         targets = self.targets
                     return self.client.shutdown(targets=targets, restart=restart, hub=hub, block=block)
                 @spin_after
                 def get_result(self, indices_or_msg_ids=None):
                     """return one or more results, specified by history index or msg_id.
                     See client.get_result for details.
                     """
                     if indices_or_msg_ids is None:
                         indices_or_msg_ids = -1
                     if isinstance(indices_or_msg_ids, int):
                         indices_or_msg_ids = self.history[indices_or_msg_ids]
                     elif isinstance(indices_or_msg_ids, (list,tuple,set)):
                         indices_or_msg_ids = list(indices_or_msg_ids)
                         for i,index in enumerate(indices_or_msg_ids):
                             if isinstance(index, int):
                                 indices_or_msg_ids[i] = self.history[index]
                     return self.client.get_result(indices_or_msg_ids)
                 #-------------------------------------------------------------------
                 # Map
                 #-------------------------------------------------------------------
                 def map(self, f, *sequences, **kwargs):
                     """override in subclasses"""
                     raise NotImplementedError
                 def map_async(self, f, *sequences, **kwargs):
                     """Parallel version of builtin `map`, using this view's engines.
                     This is equivalent to map(...block=False)
                     See `self.map` for details.
                     """
                     if 'block' in kwargs:
                         raise TypeError("map_async doesn't take a `block` keyword argument.")
                     kwargs['block'] = False
                     return self.map(f,*sequences,**kwargs)
                 def map_sync(self, f, *sequences, **kwargs):
                     """Parallel version of builtin `map`, using this view's engines.
                     This is equivalent to map(...block=True)
                     See `self.map` for details.
                     """
                     if 'block' in kwargs:
                         raise TypeError("map_sync doesn't take a `block` keyword argument.")
                     kwargs['block'] = True
                     return self.map(f,*sequences,**kwargs)
                 def imap(self, f, *sequences, **kwargs):
                     """Parallel version of `itertools.imap`.
                     See `self.map` for details.
                     """
                     return iter(self.map_async(f,*sequences, **kwargs))
                 #-------------------------------------------------------------------
                 # Decorators
                 #-------------------------------------------------------------------
                 def remote(self, block=True, **flags):
                     """Decorator for making a RemoteFunction"""
                     block = self.block if block is None else block
                     return remote(self, block=block, **flags)
                 def parallel(self, dist='b', block=None, **flags):
                     """Decorator for making a ParallelFunction"""
                     block = self.block if block is None else block
                     return parallel(self, dist=dist, block=block, **flags)
             @skip_doctest
             class DirectView(View):
                 """Direct Multiplexer View of one or more engines.
                 These are created via indexed access to a client:
                 >>> dv_1 = client[1]
                 >>> dv_all = client[:]
                 >>> dv_even = client[::2]
                 >>> dv_some = client[1:3]
                 This object provides dictionary access to engine namespaces:
                 # push a=5:
                 >>> dv['a'] = 5
                 # pull 'foo':
                 >>> db['foo']
                 """
                 def __init__(self, client=None, socket=None, targets=None):
                     super(DirectView, self).__init__(client=client, socket=socket, targets=targets)
                 @property
                 def importer(self):
                     """sync_imports(local=True) as a property.
                     See sync_imports for details.
                     """
                     return self.sync_imports(True)
                 @contextmanager
                 def sync_imports(self, local=True, quiet=False):
                     """Context Manager for performing simultaneous local and remote imports.
                     'import x as y' will *not* work.  The 'as y' part will simply be ignored.
                     If `local=True`, then the package will also be imported locally.
                     If `quiet=True`, no output will be produced when attempting remote
                     imports.
                     Note that remote-only (`local=False`) imports have not been implemented.
                     >>> with view.sync_imports():
                     ...    from numpy import recarray
                     importing recarray from numpy on engine(s)
                     """
                     import __builtin__
                     local_import = __builtin__.__import__
                     modules = set()
                     results = []
                     @util.interactive
                     def remote_import(name, fromlist, level):
                         """the function to be passed to apply, that actually performs the import
                         on the engine, and loads up the user namespace.
                         """
                         import sys
                         user_ns = globals()
                         mod = __import__(name, fromlist=fromlist, level=level)
                         if fromlist:
                             for key in fromlist:
                                 user_ns[key] = getattr(mod, key)
                         else:
                             user_ns[name] = sys.modules[name]
                     def view_import(name, globals={}, locals={}, fromlist=[], level=-1):
                         """the drop-in replacement for __import__, that optionally imports
                         locally as well.
                         """
                         # don't override nested imports
                         save_import = __builtin__.__import__
                         __builtin__.__import__ = local_import
                         if imp.lock_held():
                             # this is a side-effect import, don't do it remotely, or even
                             # ignore the local effects
                             return local_import(name, globals, locals, fromlist, level)
                         imp.acquire_lock()
                         if local:
                             mod = local_import(name, globals, locals, fromlist, level)
                         else:
                             raise NotImplementedError("remote-only imports not yet implemented")
                         imp.release_lock()
                         key = name+':'+','.join(fromlist or [])
                         if level == -1 and key not in modules:
                             modules.add(key)
                             if not quiet:
                                 if fromlist:
                                     print "importing %s from %s on engine(s)"%(','.join(fromlist), name)
                                 else:
                                     print "importing %s on engine(s)"%name
                             results.append(self.apply_async(remote_import, name, fromlist, level))
                         # restore override
                         __builtin__.__import__ = save_import
                         return mod
                     # override __import__
                     __builtin__.__import__ = view_import
                     try:
                         # enter the block
                         yield
                     except ImportError:
                         if local:
                             raise
                         else:
                             # ignore import errors if not doing local imports
                             pass
                     finally:
                         # always restore __import__
                         __builtin__.__import__ = local_import
                     for r in results:
                         # raise possible remote ImportErrors here
                         r.get()
                 @sync_results
                 @save_ids
                 def _really_apply(self, f, args=None, kwargs=None, targets=None, block=None, track=None):
                     """calls f(*args, **kwargs) on remote engines, returning the result.
                     This method sets all of `apply`'s flags via this View's attributes.
                     Parameters
                     ----------
                     f : callable
                     args : list [default: empty]
                     kwargs : dict [default: empty]
                     targets : target list [default: self.targets]
                         where to run
                     block : bool [default: self.block]
                         whether to block
                     track : bool [default: self.track]
                         whether to ask zmq to track the message, for safe non-copying sends
                     Returns
                     -------
                     if self.block is False:
                         returns AsyncResult
                     else:
                         returns actual result of f(*args, **kwargs) on the engine(s)
                         This will be a list of self.targets is also a list (even length 1), or
                         the single result if self.targets is an integer engine id
                     """
                     args = [] if args is None else args
                     kwargs = {} if kwargs is None else kwargs
                     block = self.block if block is None else block
                     track = self.track if track is None else track
                     targets = self.targets if targets is None else targets
                     _idents = self.client._build_targets(targets)[0]
                     msg_ids = []
                     trackers = []
                     for ident in _idents:
                         msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
                                                 ident=ident)
                         if track:
                             trackers.append(msg['tracker'])
                         msg_ids.append(msg['header']['msg_id'])
                     tracker = None if track is False else zmq.MessageTracker(*trackers)
                     ar = AsyncResult(self.client, msg_ids, fname=getname(f), targets=targets, tracker=tracker)
                     if block:
                         try:
                             return ar.get()
                         except KeyboardInterrupt:
                             pass
                     return ar
                 @spin_after
                 def map(self, f, *sequences, **kwargs):
                     """view.map(f, *sequences, block=self.block) => list|AsyncMapResult
                     Parallel version of builtin `map`, using this View's `targets`.
                     There will be one task per target, so work will be chunked
                     if the sequences are longer than `targets`.
                     Results can be iterated as they are ready, but will become available in chunks.
                     Parameters
                     ----------
                     f : callable
                         function to be mapped
                     *sequences: one or more sequences of matching length
                         the sequences to be distributed and passed to `f`
                     block : bool
                         whether to wait for the result or not [default self.block]
                     Returns
                     -------
                     if block=False:
                         AsyncMapResult
                             An object like AsyncResult, but which reassembles the sequence of results
                             into a single list. AsyncMapResults can be iterated through before all
                             results are complete.
                     else:
                         list
                             the result of map(f,*sequences)
                     """
                     block = kwargs.pop('block', self.block)
                     for k in kwargs.keys():
                         if k not in ['block', 'track']:
                             raise TypeError("invalid keyword arg, %r"%k)
                     assert len(sequences) > 0, "must have some sequences to map onto!"
                     pf = ParallelFunction(self, f, block=block, **kwargs)
                     return pf.map(*sequences)
                 def execute(self, code, targets=None, block=None):
                     """Executes `code` on `targets` in blocking or nonblocking manner.
                     ``execute`` is always `bound` (affects engine namespace)
                     Parameters
                     ----------
                     code : str
                             the code string to be executed
                     block : bool
                             whether or not to wait until done to return
                             default: self.block
                     """
                     return self._really_apply(util._execute, args=(code,), block=block, targets=targets)
                 def run(self, filename, targets=None, block=None):
                     """Execute contents of `filename` on my engine(s).
                     This simply reads the contents of the file and calls `execute`.
                     Parameters
                     ----------
                     filename : str
                             The path to the file
                     targets : int/str/list of ints/strs
                             the engines on which to execute
                             default : all
                     block : bool
                             whether or not to wait until done
                             default: self.block
                     """
                     with open(filename, 'r') as f:
                         # add newline in case of trailing indented whitespace
                         # which will cause SyntaxError
                         code = f.read()+'\n'
                     return self.execute(code, block=block, targets=targets)
                 def update(self, ns):
                     """update remote namespace with dict `ns`
                     See `push` for details.
                     """
                     return self.push(ns, block=self.block, track=self.track)
                 def push(self, ns, targets=None, block=None, track=None):
                     """update remote namespace with dict `ns`
                     Parameters
                     ----------
                     ns : dict
                         dict of keys with which to update engine namespace(s)
                     block : bool [default : self.block]
                         whether to wait to be notified of engine receipt
                     """
                     block = block if block is not None else self.block
                     track = track if track is not None else self.track
                     targets = targets if targets is not None else self.targets
                     # applier = self.apply_sync if block else self.apply_async
                     if not isinstance(ns, dict):
                         raise TypeError("Must be a dict, not %s"%type(ns))
-                    return self._really_apply(util._push, (ns,), block=block, track=track, targets=targets)
+                    return self._really_apply(util._push, kwargs=ns, block=block, track=track, targets=targets)
                 def get(self, key_s):
                     """get object(s) by `key_s` from remote namespace
                     see `pull` for details.
                     """
                     # block = block if block is not None else self.block
                     return self.pull(key_s, block=True)
                 def pull(self, names, targets=None, block=None):
                     """get object(s) by `name` from remote namespace
                     will return one object if it is a key.
                     can also take a list of keys, in which case it will return a list of objects.
                     """
                     block = block if block is not None else self.block
                     targets = targets if targets is not None else self.targets
                     applier = self.apply_sync if block else self.apply_async
                     if isinstance(names, basestring):
                         pass
                     elif isinstance(names, (list,tuple,set)):
                         for key in names:
                             if not isinstance(key, basestring):
                                 raise TypeError("keys must be str, not type %r"%type(key))
                     else:
                         raise TypeError("names must be strs, not %r"%names)
                     return self._really_apply(util._pull, (names,), block=block, targets=targets)
                 def scatter(self, key, seq, dist='b', flatten=False, targets=None, block=None, track=None):
                     """
                     Partition a Python sequence and send the partitions to a set of engines.
                     """
                     block = block if block is not None else self.block
                     track = track if track is not None else self.track
                     targets = targets if targets is not None else self.targets
                     mapObject = Map.dists[dist]()
                     nparts = len(targets)
                     msg_ids = []
                     trackers = []
                     for index, engineid in enumerate(targets):
                         partition = mapObject.getPartition(seq, index, nparts)
                         if flatten and len(partition) == 1:
                             ns = {key: partition[0]}
                         else:
                             ns = {key: partition}
                         r = self.push(ns, block=False, track=track, targets=engineid)
                         msg_ids.extend(r.msg_ids)
                         if track:
                             trackers.append(r._tracker)
                     if track:
                         tracker = zmq.MessageTracker(*trackers)
                     else:
                         tracker = None
                     r = AsyncResult(self.client, msg_ids, fname='scatter', targets=targets, tracker=tracker)
                     if block:
                         r.wait()
                     else:
                         return r
                 @sync_results
                 @save_ids
                 def gather(self, key, dist='b', targets=None, block=None):
                     """
                     Gather a partitioned sequence on a set of engines as a single local seq.
                     """
                     block = block if block is not None else self.block
                     targets = targets if targets is not None else self.targets
                     mapObject = Map.dists[dist]()
                     msg_ids = []
                     for index, engineid in enumerate(targets):
                         msg_ids.extend(self.pull(key, block=False, targets=engineid).msg_ids)
                     r = AsyncMapResult(self.client, msg_ids, mapObject, fname='gather')
                     if block:
                         try:
                             return r.get()
                         except KeyboardInterrupt:
                             pass
                     return r
                 def __getitem__(self, key):
                     return self.get(key)
                 def __setitem__(self,key, value):
                     self.update({key:value})
                 def clear(self, targets=None, block=False):
                     """Clear the remote namespaces on my engines."""
                     block = block if block is not None else self.block
                     targets = targets if targets is not None else self.targets
                     return self.client.clear(targets=targets, block=block)
                 def kill(self, targets=None, block=True):
                     """Kill my engines."""
                     block = block if block is not None else self.block
                     targets = targets if targets is not None else self.targets
                     return self.client.kill(targets=targets, block=block)
                 #----------------------------------------
                 # activate for %px,%autopx magics
                 #----------------------------------------
                 def activate(self):
                     """Make this `View` active for parallel magic commands.
                     IPython has a magic command syntax to work with `MultiEngineClient` objects.
                     In a given IPython session there is a single active one.  While
                     there can be many `Views` created and used by the user,
                     there is only one active one.  The active `View` is used whenever
                     the magic commands %px and %autopx are used.
                     The activate() method is called on a given `View` to make it
                     active.  Once this has been done, the magic commands can be used.
                     """
                     try:
                         # This is injected into __builtins__.
                         ip = get_ipython()
                     except NameError:
                         print "The IPython parallel magics (%result, %px, %autopx) only work within IPython."
                     else:
                         pmagic = ip.plugin_manager.get_plugin('parallelmagic')
                         if pmagic is None:
                             ip.magic_load_ext('parallelmagic')
                             pmagic = ip.plugin_manager.get_plugin('parallelmagic')
                         pmagic.active_view = self
             @skip_doctest
             class LoadBalancedView(View):
                 """An load-balancing View that only executes via the Task scheduler.
                 Load-balanced views can be created with the client's `view` method:
                 >>> v = client.load_balanced_view()
                 or targets can be specified, to restrict the potential destinations:
                 >>> v = client.client.load_balanced_view([1,3])
                 which would restrict loadbalancing to between engines 1 and 3.
                 """
                 follow=Any()
                 after=Any()
                 timeout=CFloat()
                 retries = Integer(0)
                 _task_scheme = Any()
                 _flag_names = List(['targets', 'block', 'track', 'follow', 'after', 'timeout', 'retries'])
                 def __init__(self, client=None, socket=None, **flags):
                     super(LoadBalancedView, self).__init__(client=client, socket=socket, **flags)
                     self._task_scheme=client._task_scheme
                 def _validate_dependency(self, dep):
                     """validate a dependency.
                     For use in `set_flags`.
                     """
                     if dep is None or isinstance(dep, (basestring, AsyncResult, Dependency)):
                         return True
                     elif isinstance(dep, (list,set, tuple)):
                         for d in dep:
                             if not isinstance(d, (basestring, AsyncResult)):
                                 return False
                     elif isinstance(dep, dict):
                         if set(dep.keys()) != set(Dependency().as_dict().keys()):
                             return False
                         if not isinstance(dep['msg_ids'], list):
                             return False
                         for d in dep['msg_ids']:
                             if not isinstance(d, basestring):
                                 return False
                     else:
                         return False
                     return True
                 def _render_dependency(self, dep):
                     """helper for building jsonable dependencies from various input forms."""
                     if isinstance(dep, Dependency):
                         return dep.as_dict()
                     elif isinstance(dep, AsyncResult):
                         return dep.msg_ids
                     elif dep is None:
                         return []
                     else:
                         # pass to Dependency constructor
                         return list(Dependency(dep))
                 def set_flags(self, **kwargs):
                     """set my attribute flags by keyword.
                     A View is a wrapper for the Client's apply method, but with attributes
                     that specify keyword arguments, those attributes can be set by keyword
                     argument with this method.
                     Parameters
                     ----------
                     block : bool
                         whether to wait for results
                     track : bool
                         whether to create a MessageTracker to allow the user to
                         safely edit after arrays and buffers during non-copying
                         sends.
                     after : Dependency or collection of msg_ids
                         Only for load-balanced execution (targets=None)
                         Specify a list of msg_ids as a time-based dependency.
                         This job will only be run *after* the dependencies
                         have been met.
                     follow : Dependency or collection of msg_ids
                         Only for load-balanced execution (targets=None)
                         Specify a list of msg_ids as a location-based dependency.
                         This job will only be run on an engine where this dependency
                         is met.
                     timeout : float/int or None
                         Only for load-balanced execution (targets=None)
                         Specify an amount of time (in seconds) for the scheduler to
                         wait for dependencies to be met before failing with a
                         DependencyTimeout.
                     retries : int
                         Number of times a task will be retried on failure.
                     """
                     super(LoadBalancedView, self).set_flags(**kwargs)
                     for name in ('follow', 'after'):
                         if name in kwargs:
                             value = kwargs[name]
                             if self._validate_dependency(value):
                                 setattr(self, name, value)
                             else:
                                 raise ValueError("Invalid dependency: %r"%value)
                     if 'timeout' in kwargs:
                         t = kwargs['timeout']
                         if not isinstance(t, (int, long, float, type(None))):
                             raise TypeError("Invalid type for timeout: %r"%type(t))
                         if t is not None:
                             if t < 0:
                                 raise ValueError("Invalid timeout: %s"%t)
                         self.timeout = t
                 @sync_results
                 @save_ids
                 def _really_apply(self, f, args=None, kwargs=None, block=None, track=None,
                                                     after=None, follow=None, timeout=None,
                                                     targets=None, retries=None):
                     """calls f(*args, **kwargs) on a remote engine, returning the result.
                     This method temporarily sets all of `apply`'s flags for a single call.
                     Parameters
                     ----------
                     f : callable
                     args : list [default: empty]
                     kwargs : dict [default: empty]
                     block : bool [default: self.block]
                         whether to block
                     track : bool [default: self.track]
                         whether to ask zmq to track the message, for safe non-copying sends
                     !!!!!! TODO: THE REST HERE  !!!!
                     Returns
                     -------
                     if self.block is False:
                         returns AsyncResult
                     else:
                         returns actual result of f(*args, **kwargs) on the engine(s)
                         This will be a list of self.targets is also a list (even length 1), or
                         the single result if self.targets is an integer engine id
                     """
                     # validate whether we can run
                     if self._socket.closed:
                         msg = "Task farming is disabled"
                         if self._task_scheme == 'pure':
                             msg += " because the pure ZMQ scheduler cannot handle"
                             msg += " disappearing engines."
                         raise RuntimeError(msg)
                     if self._task_scheme == 'pure':
                         # pure zmq scheme doesn't support extra features
                         msg = "Pure ZMQ scheduler doesn't support the following flags:"
                         "follow, after, retries, targets, timeout"
                         if (follow or after or retries or targets or timeout):
                             # hard fail on Scheduler flags
                             raise RuntimeError(msg)
                         if isinstance(f, dependent):
                             # soft warn on functional dependencies
                             warnings.warn(msg, RuntimeWarning)
                     # build args
                     args = [] if args is None else args
                     kwargs = {} if kwargs is None else kwargs
                     block = self.block if block is None else block
                     track = self.track if track is None else track
                     after = self.after if after is None else after
                     retries = self.retries if retries is None else retries
                     follow = self.follow if follow is None else follow
                     timeout = self.timeout if timeout is None else timeout
                     targets = self.targets if targets is None else targets
                     if not isinstance(retries, int):
                         raise TypeError('retries must be int, not %r'%type(retries))
                     if targets is None:
                         idents = []
                     else:
                         idents = self.client._build_targets(targets)[0]
                         # ensure *not* bytes
                         idents = [ ident.decode() for ident in idents ]
                     after = self._render_dependency(after)
                     follow = self._render_dependency(follow)
                     subheader = dict(after=after, follow=follow, timeout=timeout, targets=idents, retries=retries)
                     msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
                                             subheader=subheader)
                     tracker = None if track is False else msg['tracker']
                     ar = AsyncResult(self.client, msg['header']['msg_id'], fname=getname(f), targets=None, tracker=tracker)
                     if block:
                         try:
                             return ar.get()
                         except KeyboardInterrupt:
                             pass
                     return ar
                 @spin_after
                 @save_ids
                 def map(self, f, *sequences, **kwargs):
                     """view.map(f, *sequences, block=self.block, chunksize=1, ordered=True) => list|AsyncMapResult
                     Parallel version of builtin `map`, load-balanced by this View.
                     `block`, and `chunksize` can be specified by keyword only.
                     Each `chunksize` elements will be a separate task, and will be
                     load-balanced. This lets individual elements be available for iteration
                     as soon as they arrive.
                     Parameters
                     ----------
                     f : callable
                         function to be mapped
                     *sequences: one or more sequences of matching length
                         the sequences to be distributed and passed to `f`
                     block : bool [default self.block]
                         whether to wait for the result or not
                     track : bool
                         whether to create a MessageTracker to allow the user to
                         safely edit after arrays and buffers during non-copying
                         sends.
                     chunksize : int [default 1]
                         how many elements should be in each task.
                     ordered : bool [default True]
                         Whether the results should be gathered as they arrive, or enforce
                         the order of submission.
                         Only applies when iterating through AsyncMapResult as results arrive.
                         Has no effect when block=True.
                     Returns
                     -------
                     if block=False:
                         AsyncMapResult
                             An object like AsyncResult, but which reassembles the sequence of results
                             into a single list. AsyncMapResults can be iterated through before all
                             results are complete.
                         else:
                             the result of map(f,*sequences)
                     """
                     # default
                     block = kwargs.get('block', self.block)
                     chunksize = kwargs.get('chunksize', 1)
                     ordered = kwargs.get('ordered', True)
                     keyset = set(kwargs.keys())
                     extra_keys = keyset.difference_update(set(['block', 'chunksize']))
                     if extra_keys:
                         raise TypeError("Invalid kwargs: %s"%list(extra_keys))
                     assert len(sequences) > 0, "must have some sequences to map onto!"
                     pf = ParallelFunction(self, f, block=block, chunksize=chunksize, ordered=ordered)
                     return pf.map(*sequences)
             __all__ = ['LoadBalancedView', 'DirectView']

IPython/parallel/util.py

0 +1 -1

             """some generic utilities for dealing with classes, urls, and serialization
             Authors:
             * Min RK
             """
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010-2011  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             # Standard library imports.
             import logging
             import os
             import re
             import stat
             import socket
             import sys
             from signal import signal, SIGINT, SIGABRT, SIGTERM
             try:
                 from signal import SIGKILL
             except ImportError:
                 SIGKILL=None
             try:
                 import cPickle
                 pickle = cPickle
             except:
                 cPickle = None
                 import pickle
             # System library imports
             import zmq
             from zmq.log import handlers
             # IPython imports
             from IPython.config.application import Application
             from IPython.utils.pickleutil import can, uncan, canSequence, uncanSequence
             from IPython.utils.newserialized import serialize, unserialize
             from IPython.zmq.log import EnginePUBHandler
             #-----------------------------------------------------------------------------
             # Classes
             #-----------------------------------------------------------------------------
             class Namespace(dict):
                 """Subclass of dict for attribute access to keys."""
                 def __getattr__(self, key):
                     """getattr aliased to getitem"""
                     if key in self.iterkeys():
                         return self[key]
                     else:
                         raise NameError(key)
                 def __setattr__(self, key, value):
                     """setattr aliased to setitem, with strict"""
                     if hasattr(dict, key):
                         raise KeyError("Cannot override dict keys %r"%key)
                     self[key] = value
             class ReverseDict(dict):
                 """simple double-keyed subset of dict methods."""
                 def __init__(self, *args, **kwargs):
                     dict.__init__(self, *args, **kwargs)
                     self._reverse = dict()
                     for key, value in self.iteritems():
                         self._reverse[value] = key
                 def __getitem__(self, key):
                     try:
                         return dict.__getitem__(self, key)
                     except KeyError:
                         return self._reverse[key]
                 def __setitem__(self, key, value):
                     if key in self._reverse:
                         raise KeyError("Can't have key %r on both sides!"%key)
                     dict.__setitem__(self, key, value)
                     self._reverse[value] = key
                 def pop(self, key):
                     value = dict.pop(self, key)
                     self._reverse.pop(value)
                     return value
                 def get(self, key, default=None):
                     try:
                         return self[key]
                     except KeyError:
                         return default
             #-----------------------------------------------------------------------------
             # Functions
             #-----------------------------------------------------------------------------
             def asbytes(s):
                 """ensure that an object is ascii bytes"""
                 if isinstance(s, unicode):
                     s = s.encode('ascii')
                 return s
             def is_url(url):
                 """boolean check for whether a string is a zmq url"""
                 if '://' not in url:
                     return False
                 proto, addr = url.split('://', 1)
                 if proto.lower() not in ['tcp','pgm','epgm','ipc','inproc']:
                     return False
                 return True
             def validate_url(url):
                 """validate a url for zeromq"""
                 if not isinstance(url, basestring):
                     raise TypeError("url must be a string, not %r"%type(url))
                 url = url.lower()
                 proto_addr = url.split('://')
                 assert len(proto_addr) == 2, 'Invalid url: %r'%url
                 proto, addr = proto_addr
                 assert proto in ['tcp','pgm','epgm','ipc','inproc'], "Invalid protocol: %r"%proto
                 # domain pattern adapted from http://www.regexlib.com/REDetails.aspx?regexp_id=391
                 # author: Remi Sabourin
                 pat = re.compile(r'^([\w\d]([\w\d\-]{0,61}[\w\d])?\.)*[\w\d]([\w\d\-]{0,61}[\w\d])?$')
                 if proto == 'tcp':
                     lis = addr.split(':')
                     assert len(lis) == 2, 'Invalid url: %r'%url
                     addr,s_port = lis
                     try:
                         port = int(s_port)
                     except ValueError:
                         raise AssertionError("Invalid port %r in url: %r"%(port, url))
                     assert addr == '*' or pat.match(addr) is not None, 'Invalid url: %r'%url
                 else:
                     # only validate tcp urls currently
                     pass
                 return True
             def validate_url_container(container):
                 """validate a potentially nested collection of urls."""
                 if isinstance(container, basestring):
                     url = container
                     return validate_url(url)
                 elif isinstance(container, dict):
                     container = container.itervalues()
                 for element in container:
                     validate_url_container(element)
             def split_url(url):
                 """split a zmq url (tcp://ip:port) into ('tcp','ip','port')."""
                 proto_addr = url.split('://')
                 assert len(proto_addr) == 2, 'Invalid url: %r'%url
                 proto, addr = proto_addr
                 lis = addr.split(':')
                 assert len(lis) == 2, 'Invalid url: %r'%url
                 addr,s_port = lis
                 return proto,addr,s_port
             def disambiguate_ip_address(ip, location=None):
                 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
                 ones, based on the location (default interpretation of location is localhost)."""
                 if ip in ('0.0.0.0', '*'):
                     try:
                         external_ips = socket.gethostbyname_ex(socket.gethostname())[2]
                     except (socket.gaierror, IndexError):
                         # couldn't identify this machine, assume localhost
                         external_ips = []
                     if location is None or location in external_ips or not external_ips:
                         # If location is unspecified or cannot be determined, assume local
                         ip='127.0.0.1'
                     elif location:
                         return location
                 return ip
             def disambiguate_url(url, location=None):
                 """turn multi-ip interfaces '0.0.0.0' and '*' into connectable
                 ones, based on the location (default interpretation is localhost).
                 This is for zeromq urls, such as tcp://*:10101."""
                 try:
                     proto,ip,port = split_url(url)
                 except AssertionError:
                     # probably not tcp url; could be ipc, etc.
                     return url
                 ip = disambiguate_ip_address(ip,location)
                 return "%s://%s:%s"%(proto,ip,port)
             def serialize_object(obj, threshold=64e-6):
                 """Serialize an object into a list of sendable buffers.
                 Parameters
                 ----------
                 obj : object
                     The object to be serialized
                 threshold : float
                     The threshold for not double-pickling the content.
                 Returns
                 -------
                 ('pmd', [bufs]) :
                     where pmd is the pickled metadata wrapper,
                     bufs is a list of data buffers
                 """
                 databuffers = []
                 if isinstance(obj, (list, tuple)):
                     clist = canSequence(obj)
                     slist = map(serialize, clist)
                     for s in slist:
                         if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
                             databuffers.append(s.getData())
                             s.data = None
                     return pickle.dumps(slist,-1), databuffers
                 elif isinstance(obj, dict):
                     sobj = {}
                     for k in sorted(obj.iterkeys()):
                         s = serialize(can(obj[k]))
                         if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
                             databuffers.append(s.getData())
                             s.data = None
                         sobj[k] = s
                     return pickle.dumps(sobj,-1),databuffers
                 else:
                     s = serialize(can(obj))
                     if s.typeDescriptor in ('buffer', 'ndarray') or s.getDataSize() > threshold:
                         databuffers.append(s.getData())
                         s.data = None
                     return pickle.dumps(s,-1),databuffers
             def unserialize_object(bufs):
                 """reconstruct an object serialized by serialize_object from data buffers."""
                 bufs = list(bufs)
                 sobj = pickle.loads(bufs.pop(0))
                 if isinstance(sobj, (list, tuple)):
                     for s in sobj:
                         if s.data is None:
                             s.data = bufs.pop(0)
                     return uncanSequence(map(unserialize, sobj)), bufs
                 elif isinstance(sobj, dict):
                     newobj = {}
                     for k in sorted(sobj.iterkeys()):
                         s = sobj[k]
                         if s.data is None:
                             s.data = bufs.pop(0)
                         newobj[k] = uncan(unserialize(s))
                     return newobj, bufs
                 else:
                     if sobj.data is None:
                         sobj.data = bufs.pop(0)
                     return uncan(unserialize(sobj)), bufs
             def pack_apply_message(f, args, kwargs, threshold=64e-6):
                 """pack up a function, args, and kwargs to be sent over the wire
                 as a series of buffers. Any object whose data is larger than `threshold`
                 will not have their data copied (currently only numpy arrays support zero-copy)"""
                 msg = [pickle.dumps(can(f),-1)]
                 databuffers = [] # for large objects
                 sargs, bufs = serialize_object(args,threshold)
                 msg.append(sargs)
                 databuffers.extend(bufs)
                 skwargs, bufs = serialize_object(kwargs,threshold)
                 msg.append(skwargs)
                 databuffers.extend(bufs)
                 msg.extend(databuffers)
                 return msg
             def unpack_apply_message(bufs, g=None, copy=True):
                 """unpack f,args,kwargs from buffers packed by pack_apply_message()
                 Returns: original f,args,kwargs"""
                 bufs = list(bufs) # allow us to pop
                 assert len(bufs) >= 3, "not enough buffers!"
                 if not copy:
                     for i in range(3):
                         bufs[i] = bufs[i].bytes
                 cf = pickle.loads(bufs.pop(0))
                 sargs = list(pickle.loads(bufs.pop(0)))
                 skwargs = dict(pickle.loads(bufs.pop(0)))
                 # print sargs, skwargs
                 f = uncan(cf, g)
                 for sa in sargs:
                     if sa.data is None:
                         m = bufs.pop(0)
                         if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
                             # always use a buffer, until memoryviews get sorted out
                             sa.data = buffer(m)
                             # disable memoryview support
                             # if copy:
                             #     sa.data = buffer(m)
                             # else:
                             #     sa.data = m.buffer
                         else:
                             if copy:
                                 sa.data = m
                             else:
                                 sa.data = m.bytes
                 args = uncanSequence(map(unserialize, sargs), g)
                 kwargs = {}
                 for k in sorted(skwargs.iterkeys()):
                     sa = skwargs[k]
                     if sa.data is None:
                         m = bufs.pop(0)
                         if sa.getTypeDescriptor() in ('buffer', 'ndarray'):
                             # always use a buffer, until memoryviews get sorted out
                             sa.data = buffer(m)
                             # disable memoryview support
                             # if copy:
                             #     sa.data = buffer(m)
                             # else:
                             #     sa.data = m.buffer
                         else:
                             if copy:
                                 sa.data = m
                             else:
                                 sa.data = m.bytes
                     kwargs[k] = uncan(unserialize(sa), g)
                 return f,args,kwargs
             #--------------------------------------------------------------------------
             # helpers for implementing old MEC API via view.apply
             #--------------------------------------------------------------------------
             def interactive(f):
                 """decorator for making functions appear as interactively defined.
                 This results in the function being linked to the user_ns as globals()
                 instead of the module globals().
                 """
                 f.__module__ = '__main__'
                 return f
             @interactive
-            def _push(ns):
+            def _push(**ns):
                 """helper method for implementing `client.push` via `client.apply`"""
                 globals().update(ns)
             @interactive
             def _pull(keys):
                 """helper method for implementing `client.pull` via `client.apply`"""
                 user_ns = globals()
                 if isinstance(keys, (list,tuple, set)):
                     for key in keys:
                         if not user_ns.has_key(key):
                             raise NameError("name '%s' is not defined"%key)
                     return map(user_ns.get, keys)
                 else:
                     if not user_ns.has_key(keys):
                         raise NameError("name '%s' is not defined"%keys)
                     return user_ns.get(keys)
             @interactive
             def _execute(code):
                 """helper method for implementing `client.execute` via `client.apply`"""
                 exec code in globals()
             #--------------------------------------------------------------------------
             # extra process management utilities
             #--------------------------------------------------------------------------
             _random_ports = set()
             def select_random_ports(n):
                 """Selects and return n random ports that are available."""
                 ports = []
                 for i in xrange(n):
                     sock = socket.socket()
                     sock.bind(('', 0))
                     while sock.getsockname()[1] in _random_ports:
                         sock.close()
                         sock = socket.socket()
                         sock.bind(('', 0))
                     ports.append(sock)
                 for i, sock in enumerate(ports):
                     port = sock.getsockname()[1]
                     sock.close()
                     ports[i] = port
                     _random_ports.add(port)
                 return ports
             def signal_children(children):
                 """Relay interupt/term signals to children, for more solid process cleanup."""
                 def terminate_children(sig, frame):
                     log = Application.instance().log
                     log.critical("Got signal %i, terminating children..."%sig)
                     for child in children:
                         child.terminate()
                     sys.exit(sig != SIGINT)
                     # sys.exit(sig)
                 for sig in (SIGINT, SIGABRT, SIGTERM):
                     signal(sig, terminate_children)
             def generate_exec_key(keyfile):
                 import uuid
                 newkey = str(uuid.uuid4())
                 with open(keyfile, 'w') as f:
                     # f.write('ipython-key ')
                     f.write(newkey+'\n')
                 # set user-only RW permissions (0600)
                 # this will have no effect on Windows
                 os.chmod(keyfile, stat.S_IRUSR|stat.S_IWUSR)
             def integer_loglevel(loglevel):
                 try:
                     loglevel = int(loglevel)
                 except ValueError:
                     if isinstance(loglevel, str):
                         loglevel = getattr(logging, loglevel)
                 return loglevel
             def connect_logger(logname, context, iface, root="ip", loglevel=logging.DEBUG):
                 logger = logging.getLogger(logname)
                 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
                     # don't add a second PUBHandler
                     return
                 loglevel = integer_loglevel(loglevel)
                 lsock = context.socket(zmq.PUB)
                 lsock.connect(iface)
                 handler = handlers.PUBHandler(lsock)
                 handler.setLevel(loglevel)
                 handler.root_topic = root
                 logger.addHandler(handler)
                 logger.setLevel(loglevel)
             def connect_engine_logger(context, iface, engine, loglevel=logging.DEBUG):
                 logger = logging.getLogger()
                 if any([isinstance(h, handlers.PUBHandler) for h in logger.handlers]):
                     # don't add a second PUBHandler
                     return
                 loglevel = integer_loglevel(loglevel)
                 lsock = context.socket(zmq.PUB)
                 lsock.connect(iface)
                 handler = EnginePUBHandler(engine, lsock)
                 handler.setLevel(loglevel)
                 logger.addHandler(handler)
                 logger.setLevel(loglevel)
                 return logger
             def local_logger(logname, loglevel=logging.DEBUG):
                 loglevel = integer_loglevel(loglevel)
                 logger = logging.getLogger(logname)
                 if any([isinstance(h, logging.StreamHandler) for h in logger.handlers]):
                     # don't add a second StreamHandler
                     return
                 handler = logging.StreamHandler()
                 handler.setLevel(loglevel)
                 formatter = logging.Formatter("%(asctime)s.%(msecs).03d [%(name)s] %(message)s",
                             datefmt="%Y-%m-%d %H:%M:%S")
                 handler.setFormatter(formatter)
                 logger.addHandler(handler)
                 logger.setLevel(loglevel)
                 return logger

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages