upstream/ipython Commit - r3636:154798bf

Client -> HasTraits, update examples with API tweaks

MinRK -

r3636:154798bf

parent child

IPython/zmq/parallel/client.py

0 +109 -59

             """A semi-synchronous Client for the ZMQ controller"""
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             import os
             import json
             import time
             import warnings
             from datetime import datetime
             from getpass import getpass
             from pprint import pprint
             pjoin = os.path.join
             import zmq
             # from zmq.eventloop import ioloop, zmqstream
             from IPython.utils.path import get_ipython_dir
+            from IPython.utils.traitlets import (HasTraits, Int, Instance, CUnicode,
+                                                Dict, List, Bool, Str, Set)
             from IPython.external.decorator import decorator
             from IPython.external.ssh import tunnel
             import error
             import map as Map
             import streamsession as ss
             from asyncresult import AsyncResult, AsyncMapResult
             from clusterdir import ClusterDir, ClusterDirError
             from dependency import Dependency, depend, require, dependent
             from remotefunction import remote,parallel,ParallelFunction,RemoteFunction
             from util import ReverseDict, disambiguate_url, validate_url
             from view import DirectView, LoadBalancedView
             #--------------------------------------------------------------------------
             # helpers for implementing old MEC API via client.apply
             #--------------------------------------------------------------------------
             def _push(ns):
                 """helper method for implementing `client.push` via `client.apply`"""
                 globals().update(ns)
             def _pull(keys):
                 """helper method for implementing `client.pull` via `client.apply`"""
                 g = globals()
                 if isinstance(keys, (list,tuple, set)):
                     for key in keys:
                         if not g.has_key(key):
                             raise NameError("name '%s' is not defined"%key)
                     return map(g.get, keys)
                 else:
                     if not g.has_key(keys):
                         raise NameError("name '%s' is not defined"%keys)
                     return g.get(keys)
             def _clear():
                 """helper method for implementing `client.clear` via `client.apply`"""
                 globals().clear()
             def _execute(code):
                 """helper method for implementing `client.execute` via `client.apply`"""
                 exec code in globals()
             #--------------------------------------------------------------------------
             # Decorators for Client methods
             #--------------------------------------------------------------------------
             @decorator
             def spinfirst(f, self, *args, **kwargs):
                 """Call spin() to sync state prior to calling the method."""
                 self.spin()
                 return f(self, *args, **kwargs)
             @decorator
             def defaultblock(f, self, *args, **kwargs):
                 """Default to self.block; preserve self.block."""
                 block = kwargs.get('block',None)
                 block = self.block if block is None else block
                 saveblock = self.block
                 self.block = block
                 try:
                     ret = f(self, *args, **kwargs)
                 finally:
                     self.block = saveblock
                 return ret
             #--------------------------------------------------------------------------
             # Classes
             #--------------------------------------------------------------------------
             class Metadata(dict):
                 """Subclass of dict for initializing metadata values.
                 Attribute access works on keys.
                 These objects have a strict set of keys - errors will raise if you try
                 to add new keys.
                 """
                 def __init__(self, *args, **kwargs):
                     dict.__init__(self)
                     md = {'msg_id' : None,
                           'submitted' : None,
                           'started' : None,
                           'completed' : None,
                           'received' : None,
                           'engine_uuid' : None,
                           'engine_id' : None,
                           'follow' : None,
                           'after' : None,
                           'status' : None,
                           'pyin' : None,
                           'pyout' : None,
                           'pyerr' : None,
                           'stdout' : '',
                           'stderr' : '',
                         }
                     self.update(md)
                     self.update(dict(*args, **kwargs))
                 def __getattr__(self, key):
                     """getattr aliased to getitem"""
                     if key in self.iterkeys():
                         return self[key]
                     else:
                         raise AttributeError(key)
                 def __setattr__(self, key, value):
                     """setattr aliased to setitem, with strict"""
                     if key in self.iterkeys():
                         self[key] = value
                     else:
                         raise AttributeError(key)
                 def __setitem__(self, key, value):
                     """strict static key enforcement"""
                     if key in self.iterkeys():
                         dict.__setitem__(self, key, value)
                     else:
                         raise KeyError(key)
-            class Client(object):
+            class Client(HasTraits):
                 """A semi-synchronous client to the IPython ZMQ controller
                 Parameters
                 ----------
                 url_or_file : bytes; zmq url or path to ipcontroller-client.json
                     Connection information for the Hub's registration.  If a json connector
                     file is given, then likely no further configuration is necessary.
                     [Default: use profile]
                 profile : bytes
                     The name of the Cluster profile to be used to find connector information.
                     [Default: 'default']
                 context : zmq.Context
                     Pass an existing zmq.Context instance, otherwise the client will create its own.
                 username : bytes
                     set username to be passed to the Session object
                 debug : bool
                     flag for lots of message printing for debug purposes
                 #-------------- ssh related args ----------------
                 # These are args for configuring the ssh tunnel to be used
                 # credentials are used to forward connections over ssh to the Controller
                 # Note that the ip given in `addr` needs to be relative to sshserver
                 # The most basic case is to leave addr as pointing to localhost (127.0.0.1),
                 # and set sshserver as the same machine the Controller is on. However,
                 # the only requirement is that sshserver is able to see the Controller
                 # (i.e. is within the same trusted network).
                 sshserver : str
                     A string of the form passed to ssh, i.e. 'server.tld' or 'user@server.tld:port'
                     If keyfile or password is specified, and this is not, it will default to
                     the ip given in addr.
                 sshkey : str; path to public ssh key file
                     This specifies a key to be used in ssh login, default None.
                     Regular default ssh keys will be used without specifying this argument.
                 password : str
                     Your ssh password to sshserver. Note that if this is left None,
                     you will be prompted for it if passwordless key based login is unavailable.
                 paramiko : bool
                     flag for whether to use paramiko instead of shell ssh for tunneling.
                     [default: True on win32, False else]
                 #------- exec authentication args -------
                 # If even localhost is untrusted, you can have some protection against
                 # unauthorized execution by using a key.  Messages are still sent
                 # as cleartext, so if someone can snoop your loopback traffic this will
                 # not help against malicious attacks.
                 exec_key : str
                     an authentication key or file containing a key
                     default: None
                 Attributes
                 ----------
                 ids : set of int engine IDs
                     requesting the ids attribute always synchronizes
                     the registration state. To request ids without synchronization,
                     use semi-private _ids attributes.
                 history : list of msg_ids
                     a list of msg_ids, keeping track of all the execution
                     messages you have submitted in order.
                 outstanding : set of msg_ids
                     a set of msg_ids that have been submitted, but whose
                     results have not yet been received.
                 results : dict
                     a dict of all our results, keyed by msg_id
                 block : bool
                     determines default behavior when block not specified
                     in execution methods
                 Methods
                 -------
                 spin
                     flushes incoming results and registration state changes
                     control methods spin, and requesting `ids` also ensures up to date
                 barrier
                     wait on one or more msg_ids
                 execution methods
                     apply
                     legacy: execute, run
                 query methods
                     queue_status, get_result, purge
                 control methods
                     abort, shutdown
                 """
-                _connected=False
+                block = Bool(False)
-                _ssh=False
+                outstanding=Set()
-                _engines=None
+                results = Dict()
-                _registration_socket=None
+                metadata = Dict()
-                _query_socket=None
+                history = List()
-                _control_socket=None
+                debug = Bool(False)
-                _iopub_socket=None
+                profile=CUnicode('default')
-                _notification_socket=None
-                _mux_socket=None
+                _ids = List()
-                _task_socket=None
+                _connected=Bool(False)
-                _task_scheme=None
+                _ssh=Bool(False)
-                block = False
+                _context = Instance('zmq.Context')
-                outstanding=None
+                _config = Dict()
-                results = None
+                _engines=Instance(ReverseDict, (), {})
-                history = None
+                _registration_socket=Instance('zmq.Socket')
-                debug = False
+                _query_socket=Instance('zmq.Socket')
-                targets = None
+                _control_socket=Instance('zmq.Socket')
+                _iopub_socket=Instance('zmq.Socket')
+                _notification_socket=Instance('zmq.Socket')
+                _mux_socket=Instance('zmq.Socket')
+                _task_socket=Instance('zmq.Socket')
+                _task_scheme=Str()
+                _balanced_views=Dict()
+                _direct_views=Dict()
+                _closed = False
                 def __init__(self, url_or_file=None, profile='default', cluster_dir=None, ipython_dir=None,
                         context=None, username=None, debug=False, exec_key=None,
                         sshserver=None, sshkey=None, password=None, paramiko=None,
                         ):
+                    super(Client, self).__init__(debug=debug, profile=profile)
                     if context is None:
                         context = zmq.Context()
-                    self.context = context
+                    self._context = context
                     self._setup_cluster_dir(profile, cluster_dir, ipython_dir)
                     if self._cd is not None:
                         if url_or_file is None:
                             url_or_file = pjoin(self._cd.security_dir, 'ipcontroller-client.json')
                     assert url_or_file is not None, "I can't find enough information to connect to a controller!"\
                         " Please specify at least one of url_or_file or profile."
                     try:
                         validate_url(url_or_file)
                     except AssertionError:
                         if not os.path.exists(url_or_file):
                             if self._cd:
                                 url_or_file = os.path.join(self._cd.security_dir, url_or_file)
                             assert os.path.exists(url_or_file), "Not a valid connection file or url: %r"%url_or_file
                         with open(url_or_file) as f:
                             cfg = json.loads(f.read())
                     else:
                         cfg = {'url':url_or_file}
                     # sync defaults from args, json:
                     if sshserver:
                         cfg['ssh'] = sshserver
                     if exec_key:
                         cfg['exec_key'] = exec_key
                     exec_key = cfg['exec_key']
                     sshserver=cfg['ssh']
                     url = cfg['url']
                     location = cfg.setdefault('location', None)
                     cfg['url'] = disambiguate_url(cfg['url'], location)
                     url = cfg['url']
                     self._config = cfg
                     self._ssh = bool(sshserver or sshkey or password)
                     if self._ssh and sshserver is None:
                         # default to ssh via localhost
                         sshserver = url.split('://')[1].split(':')[0]
                     if self._ssh and password is None:
                         if tunnel.try_passwordless_ssh(sshserver, sshkey, paramiko):
                             password=False
                         else:
                             password = getpass("SSH Password for %s: "%sshserver)
                     ssh_kwargs = dict(keyfile=sshkey, password=password, paramiko=paramiko)
                     if exec_key is not None and os.path.isfile(exec_key):
                         arg = 'keyfile'
                     else:
                         arg = 'key'
                     key_arg = {arg:exec_key}
                     if username is None:
                         self.session = ss.StreamSession(**key_arg)
                     else:
                         self.session = ss.StreamSession(username, **key_arg)
-                    self._registration_socket = self.context.socket(zmq.XREQ)
+                    self._registration_socket = self._context.socket(zmq.XREQ)
                     self._registration_socket.setsockopt(zmq.IDENTITY, self.session.session)
                     if self._ssh:
                         tunnel.tunnel_connection(self._registration_socket, url, sshserver, **ssh_kwargs)
                     else:
                         self._registration_socket.connect(url)
-                    self._engines = ReverseDict()
-                    self._ids = []
+                    self.session.debug = self.debug
-                    self.outstanding=set()
-                    self.results = {}
-                    self.metadata = {}
-                    self.history = []
-                    self.debug = debug
-                    self.session.debug = debug
                     self._notification_handlers = {'registration_notification' : self._register_engine,
                                                 'unregistration_notification' : self._unregister_engine,
                                                 }
                     self._queue_handlers = {'execute_reply' : self._handle_execute_reply,
                                             'apply_reply' : self._handle_apply_reply}
                     self._connect(sshserver, ssh_kwargs)
                 def _setup_cluster_dir(self, profile, cluster_dir, ipython_dir):
                     if ipython_dir is None:
                         ipython_dir = get_ipython_dir()
                     if cluster_dir is not None:
                         try:
                             self._cd = ClusterDir.find_cluster_dir(cluster_dir)
                         except ClusterDirError:
                             pass
                     elif profile is not None:
                         try:
                             self._cd = ClusterDir.find_cluster_dir_by_profile(
                                 ipython_dir, profile)
                         except ClusterDirError:
                             pass
                     else:
                         self._cd = None
                 @property
                 def ids(self):
                     """Always up-to-date ids property."""
                     self._flush_notifications()
                     return self._ids
+                def close(self):
+                    if self._closed:
+                        return
+                    snames = filter(lambda n: n.endswith('socket'), dir(self))
+                    for socket in map(lambda name: getattr(self, name), snames):
+                        socket.close()
+                    self._closed = True
                 def _update_engines(self, engines):
                     """Update our engines dict and _ids from a dict of the form: {id:uuid}."""
                     for k,v in engines.iteritems():
                         eid = int(k)
                         self._engines[eid] = bytes(v) # force not unicode
                         self._ids.append(eid)
                     self._ids = sorted(self._ids)
                     if sorted(self._engines.keys()) != range(len(self._engines)) and \
                                     self._task_scheme == 'pure' and self._task_socket:
                         self._stop_scheduling_tasks()
                 def _stop_scheduling_tasks(self):
                     """Stop scheduling tasks because an engine has been unregistered
                     from a pure ZMQ scheduler.
                     """
                     self._task_socket.close()
                     self._task_socket = None
                     msg = "An engine has been unregistered, and we are using pure " +\
                           "ZMQ task scheduling.  Task farming will be disabled."
                     if self.outstanding:
                         msg += " If you were running tasks when this happened, " +\
                                "some `outstanding` msg_ids may never resolve."
                     warnings.warn(msg, RuntimeWarning)
                 def _build_targets(self, targets):
                     """Turn valid target IDs or 'all' into two lists:
                     (int_ids, uuids).
                     """
                     if targets is None:
                         targets = self._ids
                     elif isinstance(targets, str):
                         if targets.lower() == 'all':
                             targets = self._ids
                         else:
                             raise TypeError("%r not valid str target, must be 'all'"%(targets))
                     elif isinstance(targets, int):
                         targets = [targets]
                     return [self._engines[t] for t in targets], list(targets)
                 def _connect(self, sshserver, ssh_kwargs):
                     """setup all our socket connections to the controller. This is called from
                     __init__."""
                     # Maybe allow reconnecting?
                     if self._connected:
                         return
                     self._connected=True
                     def connect_socket(s, url):
                         url = disambiguate_url(url, self._config['location'])
                         if self._ssh:
                             return tunnel.tunnel_connection(s, url, sshserver, **ssh_kwargs)
                         else:
                             return s.connect(url)
                     self.session.send(self._registration_socket, 'connection_request')
                     idents,msg = self.session.recv(self._registration_socket,mode=0)
                     if self.debug:
                         pprint(msg)
                     msg = ss.Message(msg)
                     content = msg.content
                     self._config['registration'] = dict(content)
                     if content.status == 'ok':
                         if content.mux:
-                            self._mux_socket = self.context.socket(zmq.PAIR)
+                            self._mux_socket = self._context.socket(zmq.PAIR)
                             self._mux_socket.setsockopt(zmq.IDENTITY, self.session.session)
                             connect_socket(self._mux_socket, content.mux)
                         if content.task:
                             self._task_scheme, task_addr = content.task
-                            self._task_socket = self.context.socket(zmq.PAIR)
+                            self._task_socket = self._context.socket(zmq.PAIR)
                             self._task_socket.setsockopt(zmq.IDENTITY, self.session.session)
                             connect_socket(self._task_socket, task_addr)
                         if content.notification:
-                            self._notification_socket = self.context.socket(zmq.SUB)
+                            self._notification_socket = self._context.socket(zmq.SUB)
                             connect_socket(self._notification_socket, content.notification)
                             self._notification_socket.setsockopt(zmq.SUBSCRIBE, "")
                         if content.query:
-                            self._query_socket = self.context.socket(zmq.PAIR)
+                            self._query_socket = self._context.socket(zmq.PAIR)
                             self._query_socket.setsockopt(zmq.IDENTITY, self.session.session)
                             connect_socket(self._query_socket, content.query)
                         if content.control:
-                            self._control_socket = self.context.socket(zmq.PAIR)
+                            self._control_socket = self._context.socket(zmq.PAIR)
                             self._control_socket.setsockopt(zmq.IDENTITY, self.session.session)
                             connect_socket(self._control_socket, content.control)
                         if content.iopub:
-                            self._iopub_socket = self.context.socket(zmq.SUB)
+                            self._iopub_socket = self._context.socket(zmq.SUB)
                             self._iopub_socket.setsockopt(zmq.SUBSCRIBE, '')
                             self._iopub_socket.setsockopt(zmq.IDENTITY, self.session.session)
                             connect_socket(self._iopub_socket, content.iopub)
                         self._update_engines(dict(content.engines))
                     else:
                         self._connected = False
                         raise Exception("Failed to connect!")
                 #--------------------------------------------------------------------------
                 # handlers and callbacks for incoming messages
                 #--------------------------------------------------------------------------
                 def _register_engine(self, msg):
                     """Register a new engine, and update our connection info."""
                     content = msg['content']
                     eid = content['id']
                     d = {eid : content['queue']}
                     self._update_engines(d)
                 def _unregister_engine(self, msg):
                     """Unregister an engine that has died."""
                     content = msg['content']
                     eid = int(content['id'])
                     if eid in self._ids:
                         self._ids.remove(eid)
                         self._engines.pop(eid)
                     if self._task_socket and self._task_scheme == 'pure':
                         self._stop_scheduling_tasks()
                 def _extract_metadata(self, header, parent, content):
                     md = {'msg_id' : parent['msg_id'],
                           'received' : datetime.now(),
                           'engine_uuid' : header.get('engine', None),
                           'follow' : parent.get('follow', []),
                           'after' : parent.get('after', []),
                           'status' : content['status'],
                         }
                     if md['engine_uuid'] is not None:
                         md['engine_id'] = self._engines.get(md['engine_uuid'], None)
                     if 'date' in parent:
                         md['submitted'] = datetime.strptime(parent['date'], ss.ISO8601)
                     if 'started' in header:
                         md['started'] = datetime.strptime(header['started'], ss.ISO8601)
                     if 'date' in header:
                         md['completed'] = datetime.strptime(header['date'], ss.ISO8601)
                     return md
                 def _handle_execute_reply(self, msg):
                     """Save the reply to an execute_request into our results.
                     execute messages are never actually used. apply is used instead.
                     """
                     parent = msg['parent_header']
                     msg_id = parent['msg_id']
                     if msg_id not in self.outstanding:
                         if msg_id in self.history:
                             print ("got stale result: %s"%msg_id)
                         else:
                             print ("got unknown result: %s"%msg_id)
                     else:
                         self.outstanding.remove(msg_id)
                     self.results[msg_id] = ss.unwrap_exception(msg['content'])
                 def _handle_apply_reply(self, msg):
                     """Save the reply to an apply_request into our results."""
                     parent = msg['parent_header']
                     msg_id = parent['msg_id']
                     if msg_id not in self.outstanding:
                         if msg_id in self.history:
                             print ("got stale result: %s"%msg_id)
                             print self.results[msg_id]
                             print msg
                         else:
                             print ("got unknown result: %s"%msg_id)
                     else:
                         self.outstanding.remove(msg_id)
                     content = msg['content']
                     header = msg['header']
                     # construct metadata:
                     md = self.metadata.setdefault(msg_id, Metadata())
                     md.update(self._extract_metadata(header, parent, content))
                     self.metadata[msg_id] = md
                     # construct result:
                     if content['status'] == 'ok':
                         self.results[msg_id] = ss.unserialize_object(msg['buffers'])[0]
                     elif content['status'] == 'aborted':
                         self.results[msg_id] = error.AbortedTask(msg_id)
                     elif content['status'] == 'resubmitted':
                         # TODO: handle resubmission
                         pass
                     else:
                         e = ss.unwrap_exception(content)
                         if e.engine_info:
                             e_uuid = e.engine_info['engineid']
                             eid = self._engines[e_uuid]
                             e.engine_info['engineid'] = eid
                         self.results[msg_id] = e
                 def _flush_notifications(self):
                     """Flush notifications of engine registrations waiting
                     in ZMQ queue."""
                     msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
                     while msg is not None:
                         if self.debug:
                             pprint(msg)
                         msg = msg[-1]
                         msg_type = msg['msg_type']
                         handler = self._notification_handlers.get(msg_type, None)
                         if handler is None:
                             raise Exception("Unhandled message type: %s"%msg.msg_type)
                         else:
                             handler(msg)
                         msg = self.session.recv(self._notification_socket, mode=zmq.NOBLOCK)
                 def _flush_results(self, sock):
                     """Flush task or queue results waiting in ZMQ queue."""
                     msg = self.session.recv(sock, mode=zmq.NOBLOCK)
                     while msg is not None:
                         if self.debug:
                             pprint(msg)
                         msg = msg[-1]
                         msg_type = msg['msg_type']
                         handler = self._queue_handlers.get(msg_type, None)
                         if handler is None:
                             raise Exception("Unhandled message type: %s"%msg.msg_type)
                         else:
                             handler(msg)
                         msg = self.session.recv(sock, mode=zmq.NOBLOCK)
                 def _flush_control(self, sock):
                     """Flush replies from the control channel waiting
                     in the ZMQ queue.
                     Currently: ignore them."""
                     msg = self.session.recv(sock, mode=zmq.NOBLOCK)
                     while msg is not None:
                         if self.debug:
                             pprint(msg)
                         msg = self.session.recv(sock, mode=zmq.NOBLOCK)
                 def _flush_iopub(self, sock):
                     """Flush replies from the iopub channel waiting
                     in the ZMQ queue.
                     """
                     msg = self.session.recv(sock, mode=zmq.NOBLOCK)
                     while msg is not None:
                         if self.debug:
                             pprint(msg)
                         msg = msg[-1]
                         parent = msg['parent_header']
                         msg_id = parent['msg_id']
                         content = msg['content']
                         header = msg['header']
                         msg_type = msg['msg_type']
                         # init metadata:
                         md = self.metadata.setdefault(msg_id, Metadata())
                         if msg_type == 'stream':
                             name = content['name']
                             s = md[name] or ''
                             md[name] = s + content['data']
                         elif msg_type == 'pyerr':
                             md.update({'pyerr' : ss.unwrap_exception(content)})
                         else:
                             md.update({msg_type : content['data']})
                         self.metadata[msg_id] = md
                         msg = self.session.recv(sock, mode=zmq.NOBLOCK)
                 #--------------------------------------------------------------------------
-                # getitem
+                # len, getitem
                 #--------------------------------------------------------------------------
+                def __len__(self):
+                    """len(client) returns # of engines."""
+                    return len(self.ids)
                 def __getitem__(self, key):
                     """index access returns DirectView multiplexer objects
                     Must be int, slice, or list/tuple/xrange of ints"""
                     if not isinstance(key, (int, slice, tuple, list, xrange)):
                         raise TypeError("key by int/slice/iterable of ints only, not %s"%(type(key)))
                     else:
                         return self.view(key, balanced=False)
                 #--------------------------------------------------------------------------
                 # Begin public methods
                 #--------------------------------------------------------------------------
                 def spin(self):
                     """Flush any registration notifications and execution results
                     waiting in the ZMQ queue.
                     """
                     if self._notification_socket:
                         self._flush_notifications()
                     if self._mux_socket:
                         self._flush_results(self._mux_socket)
                     if self._task_socket:
                         self._flush_results(self._task_socket)
                     if self._control_socket:
                         self._flush_control(self._control_socket)
                     if self._iopub_socket:
                         self._flush_iopub(self._iopub_socket)
                 def barrier(self, msg_ids=None, timeout=-1):
                     """waits on one or more `msg_ids`, for up to `timeout` seconds.
                     Parameters
                     ----------
                     msg_ids : int, str, or list of ints and/or strs, or one or more AsyncResult objects
                             ints are indices to self.history
                             strs are msg_ids
                             default: wait on all outstanding messages
                     timeout : float
                             a time in seconds, after which to give up.
                             default is -1, which means no timeout
                     Returns
                     -------
                     True : when all msg_ids are done
                     False : timeout reached, some msg_ids still outstanding
                     """
                     tic = time.time()
                     if msg_ids is None:
                         theids = self.outstanding
                     else:
                         if isinstance(msg_ids, (int, str, AsyncResult)):
                             msg_ids = [msg_ids]
                         theids = set()
                         for msg_id in msg_ids:
                             if isinstance(msg_id, int):
                                 msg_id = self.history[msg_id]
                             elif isinstance(msg_id, AsyncResult):
                                 map(theids.add, msg_id.msg_ids)
                                 continue
                             theids.add(msg_id)
                     if not theids.intersection(self.outstanding):
                         return True
                     self.spin()
                     while theids.intersection(self.outstanding):
                         if timeout >= 0 and ( time.time()-tic ) > timeout:
                             break
                         time.sleep(1e-3)
                         self.spin()
                     return len(theids.intersection(self.outstanding)) == 0
                 #--------------------------------------------------------------------------
                 # Control methods
                 #--------------------------------------------------------------------------
                 @spinfirst
                 @defaultblock
                 def clear(self, targets=None, block=None):
                     """Clear the namespace in target(s)."""
                     targets = self._build_targets(targets)[0]
                     for t in targets:
                         self.session.send(self._control_socket, 'clear_request', content={}, ident=t)
                     error = False
                     if self.block:
                         for i in range(len(targets)):
                             idents,msg = self.session.recv(self._control_socket,0)
                             if self.debug:
                                 pprint(msg)
                             if msg['content']['status'] != 'ok':
                                 error = ss.unwrap_exception(msg['content'])
                     if error:
                         return error
                 @spinfirst
                 @defaultblock
                 def abort(self, msg_ids = None, targets=None, block=None):
                     """Abort the execution queues of target(s)."""
                     targets = self._build_targets(targets)[0]
                     if isinstance(msg_ids, basestring):
                         msg_ids = [msg_ids]
                     content = dict(msg_ids=msg_ids)
                     for t in targets:
                         self.session.send(self._control_socket, 'abort_request',
                                 content=content, ident=t)
                     error = False
                     if self.block:
                         for i in range(len(targets)):
                             idents,msg = self.session.recv(self._control_socket,0)
                             if self.debug:
                                 pprint(msg)
                             if msg['content']['status'] != 'ok':
                                 error = ss.unwrap_exception(msg['content'])
                     if error:
                         return error
                 @spinfirst
                 @defaultblock
                 def shutdown(self, targets=None, restart=False, controller=False, block=None):
                     """Terminates one or more engine processes, optionally including the controller."""
                     if controller:
                         targets = 'all'
                     targets = self._build_targets(targets)[0]
                     for t in targets:
                         self.session.send(self._control_socket, 'shutdown_request',
                                     content={'restart':restart},ident=t)
                     error = False
                     if block or controller:
                         for i in range(len(targets)):
                             idents,msg = self.session.recv(self._control_socket,0)
                             if self.debug:
                                 pprint(msg)
                             if msg['content']['status'] != 'ok':
                                 error = ss.unwrap_exception(msg['content'])
                     if controller:
                         time.sleep(0.25)
                         self.session.send(self._query_socket, 'shutdown_request')
                         idents,msg = self.session.recv(self._query_socket, 0)
                         if self.debug:
                             pprint(msg)
                         if msg['content']['status'] != 'ok':
                             error = ss.unwrap_exception(msg['content'])
                     if error:
                         raise error
                 #--------------------------------------------------------------------------
                 # Execution methods
                 #--------------------------------------------------------------------------
                 @defaultblock
                 def execute(self, code, targets='all', block=None):
                     """Executes `code` on `targets` in blocking or nonblocking manner.
                     ``execute`` is always `bound` (affects engine namespace)
                     Parameters
                     ----------
                     code : str
                             the code string to be executed
                     targets : int/str/list of ints/strs
                             the engines on which to execute
                             default : all
                     block : bool
                             whether or not to wait until done to return
                             default: self.block
                     """
                     result = self.apply(_execute, (code,), targets=targets, block=self.block, bound=True, balanced=False)
                     return result
                 def run(self, filename, targets='all', block=None):
                     """Execute contents of `filename` on engine(s).
                     This simply reads the contents of the file and calls `execute`.
                     Parameters
                     ----------
                     filename : str
                             The path to the file
                     targets : int/str/list of ints/strs
                             the engines on which to execute
                             default : all
                     block : bool
                             whether or not to wait until done
                             default: self.block
                     """
                     with open(filename, 'rb') as f:
                         code = f.read()
                     return self.execute(code, targets=targets, block=block)
                 def _maybe_raise(self, result):
                     """wrapper for maybe raising an exception if apply failed."""
                     if isinstance(result, error.RemoteError):
                         raise result
                     return result
                 def _build_dependency(self, dep):
                     """helper for building jsonable dependencies from various input forms"""
                     if isinstance(dep, Dependency):
                         return dep.as_dict()
                     elif isinstance(dep, AsyncResult):
                         return dep.msg_ids
                     elif dep is None:
                         return []
                     else:
                         # pass to Dependency constructor
                         return list(Dependency(dep))
                 @defaultblock
                 def apply(self, f, args=None, kwargs=None, bound=True, block=None,
                                     targets=None, balanced=None,
                                     after=None, follow=None, timeout=None):
                     """Call `f(*args, **kwargs)` on a remote engine(s), returning the result.
                     This is the central execution command for the client.
                     Parameters
                     ----------
                     f : function
                         The fuction to be called remotely
                     args : tuple/list
                         The positional arguments passed to `f`
                     kwargs : dict
                         The keyword arguments passed to `f`
                     bound : bool (default: True)
                         Whether to execute in the Engine(s) namespace, or in a clean
                         namespace not affecting the engine.
                     block : bool (default: self.block)
                         Whether to wait for the result, or return immediately.
                         False:
                             returns AsyncResult
                         True:
                             returns actual result(s) of f(*args, **kwargs)
                             if multiple targets:
                                 list of results, matching `targets`
                     targets : int,list of ints, 'all', None
                         Specify the destination of the job.
                         if None:
                             Submit via Task queue for load-balancing.
                         if 'all':
                             Run on all active engines
                         if list:
                             Run on each specified engine
                         if int:
                             Run on single engine
                     balanced : bool, default None
                         whether to load-balance.  This will default to True
                         if targets is unspecified, or False if targets is specified.
                         The following arguments are only used when balanced is True:
                     after : Dependency or collection of msg_ids
                         Only for load-balanced execution (targets=None)
                         Specify a list of msg_ids as a time-based dependency.
                         This job will only be run *after* the dependencies
                         have been met.
                     follow : Dependency or collection of msg_ids
                         Only for load-balanced execution (targets=None)
                         Specify a list of msg_ids as a location-based dependency.
                         This job will only be run on an engine where this dependency
                         is met.
                     timeout : float/int or None
                         Only for load-balanced execution (targets=None)
                         Specify an amount of time (in seconds) for the scheduler to
                         wait for dependencies to be met before failing with a
                         DependencyTimeout.
                     after,follow,timeout only used if `balanced=True`.
                     Returns
                     -------
                     if block is False:
                         return AsyncResult wrapping msg_ids
                         output of AsyncResult.get() is identical to that of `apply(...block=True)`
                     else:
                         if single target:
                             return result of `f(*args, **kwargs)`
                         else:
                             return list of results, matching `targets`
                     """
+                    assert not self._closed, "cannot use me anymore, I'm closed!"
                     # defaults:
+                    block = block if block is not None else self.block
                     args = args if args is not None else []
                     kwargs = kwargs if kwargs is not None else {}
                     if balanced is None:
                         if targets is None:
                             # default to balanced if targets unspecified
                             balanced = True
                         else:
                             # otherwise default to multiplexing
                             balanced = False
                     if targets is None and balanced is False:
                         # default to all if *not* balanced, and targets is unspecified
                         targets = 'all'
                     # enforce types of f,args,kwrags
                     if not callable(f):
                         raise TypeError("f must be callable, not %s"%type(f))
                     if not isinstance(args, (tuple, list)):
                         raise TypeError("args must be tuple or list, not %s"%type(args))
                     if not isinstance(kwargs, dict):
                         raise TypeError("kwargs must be dict, not %s"%type(kwargs))
                     options  = dict(bound=bound, block=block, targets=targets)
                     if balanced:
                         return self._apply_balanced(f, args, kwargs, timeout=timeout,
                                                     after=after, follow=follow, **options)
                     elif follow or after or timeout:
                             msg = "follow, after, and timeout args are only used for"
                             msg += " load-balanced execution."
                             raise ValueError(msg)
                     else:
                         return self._apply_direct(f, args, kwargs, **options)
-                def _apply_balanced(self, f, args, kwargs, bound=True, block=None, targets=None,
+                def _apply_balanced(self, f, args, kwargs, bound=None, block=None, targets=None,
                                         after=None, follow=None, timeout=None):
                     """call f(*args, **kwargs) remotely in a load-balanced manner.
                     This is a private method, see `apply` for details.
                     Not to be called directly!
                     """
-                    for kwarg in (bound, block, targets):
+                    loc = locals()
-                        assert kwarg is not None, "kwarg %r must be specified!"%kwarg
+                    for name in ('bound', 'block'):
+                        assert loc[name] is not None, "kwarg %r must be specified!"%name
                     if self._task_socket is None:
                         msg = "Task farming is disabled"
                         if self._task_scheme == 'pure':
                             msg += " because the pure ZMQ scheduler cannot handle"
                             msg += " disappearing engines."
                         raise RuntimeError(msg)
                     if self._task_scheme == 'pure':
                         # pure zmq scheme doesn't support dependencies
                         msg = "Pure ZMQ scheduler doesn't support dependencies"
                         if (follow or after):
                             # hard fail on DAG dependencies
                             raise RuntimeError(msg)
                         if isinstance(f, dependent):
                             # soft warn on functional dependencies
                             warnings.warn(msg, RuntimeWarning)
                     # defaults:
                     args = args if args is not None else []
                     kwargs = kwargs if kwargs is not None else {}
                     if targets:
                         idents,_ = self._build_targets(targets)
                     else:
                         idents = []
                     after = self._build_dependency(after)
                     follow = self._build_dependency(follow)
                     subheader = dict(after=after, follow=follow, timeout=timeout, targets=idents)
                     bufs = ss.pack_apply_message(f,args,kwargs)
                     content = dict(bound=bound)
                     msg = self.session.send(self._task_socket, "apply_request",
                             content=content, buffers=bufs, subheader=subheader)
                     msg_id = msg['msg_id']
                     self.outstanding.add(msg_id)
                     self.history.append(msg_id)
                     ar = AsyncResult(self, [msg_id], fname=f.__name__)
                     if block:
                         try:
                             return ar.get()
                         except KeyboardInterrupt:
                             return ar
                     else:
                         return ar
                 def _apply_direct(self, f, args, kwargs, bound=None, block=None, targets=None):
                     """Then underlying method for applying functions to specific engines
                     via the MUX queue.
                     This is a private method, see `apply` for details.
                     Not to be called directly!
                     """
+                    loc = locals()
-                    for kwarg in (bound, block, targets):
+                    for name in ('bound', 'block', 'targets'):
-                        assert kwarg is not None, "kwarg %r must be specified!"%kwarg
+                        assert loc[name] is not None, "kwarg %r must be specified!"%name
                     idents,targets = self._build_targets(targets)
                     subheader = {}
                     content = dict(bound=bound)
                     bufs = ss.pack_apply_message(f,args,kwargs)
                     msg_ids = []
                     for ident in idents:
                         msg = self.session.send(self._mux_socket, "apply_request",
                                 content=content, buffers=bufs, ident=ident, subheader=subheader)
                         msg_id = msg['msg_id']
                         self.outstanding.add(msg_id)
                         self.history.append(msg_id)
                         msg_ids.append(msg_id)
                     ar = AsyncResult(self, msg_ids, fname=f.__name__)
                     if block:
                         try:
                             return ar.get()
                         except KeyboardInterrupt:
                             return ar
                     else:
                         return ar
                 #--------------------------------------------------------------------------
-                # decorators
+                # construct a View object
                 #--------------------------------------------------------------------------
                 @defaultblock
-                def parallel(self, bound=True, targets='all', block=None):
+                def remote(self, bound=True, block=None, targets=None, balanced=None):
-                    """Decorator for making a ParallelFunction."""
+                    """Decorator for making a RemoteFunction"""
-                    return parallel(self, bound=bound, targets=targets, block=block)
+                    return remote(self, bound=bound, targets=targets, block=block, balanced=balanced)
                 @defaultblock
-                def remote(self, bound=True, targets='all', block=None):
+                def parallel(self, dist='b', bound=True, block=None, targets=None, balanced=None):
-                    """Decorator for making a RemoteFunction."""
+                    """Decorator for making a ParallelFunction"""
-                    return remote(self, bound=bound, targets=targets, block=block)
+                    return parallel(self, bound=bound, targets=targets, block=block, balanced=balanced)
-                def view(self, targets=None, balanced=False):
+                def _cache_view(self, targets, balanced):
-                    """Method for constructing View objects"""
+                    """save views, so subsequent requests don't create new objects."""
+                    if balanced:
+                        view_class = LoadBalancedView
+                        view_cache = self._balanced_views
+                    else:
+                        view_class = DirectView
+                        view_cache = self._direct_views
+                    # use str, since often targets will be a list
+                    key = str(targets)
+                    if key not in view_cache:
+                        view_cache[key] = view_class(client=self, targets=targets)
+                    return view_cache[key]
+                def view(self, targets=None, balanced=None):
+                    """Method for constructing View objects.
+                    If no arguments are specified, create a LoadBalancedView
+                    using all engines.  If only `targets` specified, it will
+                    be a DirectView.  This method is the underlying implementation
+                    of ``client.__getitem__``.
+                    Parameters
+                    ----------
+                    targets: list,slice,int,etc. [default: use all engines]
+                        The engines to use for the View
+                    balanced : bool [default: False if targets specified, True else]
+                        whether to build a LoadBalancedView or a DirectView
+                    """
+                    balanced = (targets is None) if balanced is None else balanced
                     if targets is None:
                         if balanced:
-                            return LoadBalancedView(client=self)
+                            return self._cache_view(None,True)
                         else:
                             targets = slice(None)
-                    if balanced:
-                        view_class = LoadBalancedView
-                    else:
-                        view_class = DirectView
                     if isinstance(targets, int):
                         if targets not in self.ids:
                             raise IndexError("No such engine: %i"%targets)
-                        return view_class(client=self, targets=targets)
+                        return self._cache_view(targets, balanced)
                     if isinstance(targets, slice):
                         indices = range(len(self.ids))[targets]
                         ids = sorted(self._ids)
                         targets = [ ids[i] for i in indices ]
                     if isinstance(targets, (tuple, list, xrange)):
                         _,targets = self._build_targets(list(targets))
-                        return view_class(client=self, targets=targets)
+                        return self._cache_view(targets, balanced)
                     else:
                         raise TypeError("targets by int/slice/collection of ints only, not %s"%(type(targets)))
                 #--------------------------------------------------------------------------
                 # Data movement
                 #--------------------------------------------------------------------------
                 @defaultblock
                 def push(self, ns, targets='all', block=None):
                     """Push the contents of `ns` into the namespace on `target`"""
                     if not isinstance(ns, dict):
                         raise TypeError("Must be a dict, not %s"%type(ns))
                     result = self.apply(_push, (ns,), targets=targets, block=block, bound=True, balanced=False)
                     return result
                 @defaultblock
                 def pull(self, keys, targets='all', block=None):
                     """Pull objects from `target`'s namespace by `keys`"""
                     if isinstance(keys, str):
                         pass
                     elif isinstance(keys, (list,tuple,set)):
                         for key in keys:
                             if not isinstance(key, str):
                                 raise TypeError
                     result = self.apply(_pull, (keys,), targets=targets, block=block, bound=True, balanced=False)
                     return result
                 @defaultblock
                 def scatter(self, key, seq, dist='b', flatten=False, targets='all', block=None):
                     """
                     Partition a Python sequence and send the partitions to a set of engines.
                     """
                     targets = self._build_targets(targets)[-1]
                     mapObject = Map.dists[dist]()
                     nparts = len(targets)
                     msg_ids = []
                     for index, engineid in enumerate(targets):
                         partition = mapObject.getPartition(seq, index, nparts)
                         if flatten and len(partition) == 1:
                             r = self.push({key: partition[0]}, targets=engineid, block=False)
                         else:
                             r = self.push({key: partition}, targets=engineid, block=False)
                         msg_ids.extend(r.msg_ids)
                     r = AsyncResult(self, msg_ids, fname='scatter')
                     if block:
                         return r.get()
                     else:
                         return r
                 @defaultblock
                 def gather(self, key, dist='b', targets='all', block=None):
                     """
                     Gather a partitioned sequence on a set of engines as a single local seq.
                     """
                     targets = self._build_targets(targets)[-1]
                     mapObject = Map.dists[dist]()
                     msg_ids = []
                     for index, engineid in enumerate(targets):
                         msg_ids.extend(self.pull(key, targets=engineid,block=False).msg_ids)
                     r = AsyncMapResult(self, msg_ids, mapObject, fname='gather')
                     if block:
                         return r.get()
                     else:
                         return r
                 #--------------------------------------------------------------------------
                 # Query methods
                 #--------------------------------------------------------------------------
                 @spinfirst
                 def get_results(self, msg_ids, status_only=False):
                     """Returns the result of the execute or task request with `msg_ids`.
                     Parameters
                     ----------
                     msg_ids : list of ints or msg_ids
                         if int:
                             Passed as index to self.history for convenience.
                     status_only : bool (default: False)
                         if False:
                             return the actual results
                     Returns
                     -------
                     results : dict
                         There will always be the keys 'pending' and 'completed', which will
                         be lists of msg_ids.
                     """
                     if not isinstance(msg_ids, (list,tuple)):
                         msg_ids = [msg_ids]
                     theids = []
                     for msg_id in msg_ids:
                         if isinstance(msg_id, int):
                             msg_id = self.history[msg_id]
                         if not isinstance(msg_id, str):
                             raise TypeError("msg_ids must be str, not %r"%msg_id)
                         theids.append(msg_id)
                     completed = []
                     local_results = {}
                     # comment this block out to temporarily disable local shortcut:
                     for msg_id in list(theids):
                         if msg_id in self.results:
                             completed.append(msg_id)
                             local_results[msg_id] = self.results[msg_id]
                             theids.remove(msg_id)
                     if theids: # some not locally cached
                         content = dict(msg_ids=theids, status_only=status_only)
                         msg = self.session.send(self._query_socket, "result_request", content=content)
                         zmq.select([self._query_socket], [], [])
                         idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
                         if self.debug:
                             pprint(msg)
                         content = msg['content']
                         if content['status'] != 'ok':
                             raise ss.unwrap_exception(content)
                         buffers = msg['buffers']
                     else:
                         content = dict(completed=[],pending=[])
                     content['completed'].extend(completed)
                     if status_only:
                         return content
                     failures = []
                     # load cached results into result:
                     content.update(local_results)
                     # update cache with results:
                     for msg_id in sorted(theids):
                         if msg_id in content['completed']:
                             rec = content[msg_id]
                             parent = rec['header']
                             header = rec['result_header']
                             rcontent = rec['result_content']
                             iodict = rec['io']
                             if isinstance(rcontent, str):
                                 rcontent = self.session.unpack(rcontent)
                             md = self.metadata.setdefault(msg_id, Metadata())
                             md.update(self._extract_metadata(header, parent, rcontent))
                             md.update(iodict)
                             if rcontent['status'] == 'ok':
                                 res,buffers = ss.unserialize_object(buffers)
                             else:
                                 res = ss.unwrap_exception(rcontent)
                                 failures.append(res)
                             self.results[msg_id] = res
                             content[msg_id] = res
                     error.collect_exceptions(failures, "get_results")
                     return content
                 @spinfirst
                 def queue_status(self, targets='all', verbose=False):
                     """Fetch the status of engine queues.
                     Parameters
                     ----------
                     targets : int/str/list of ints/strs
                             the engines whose states are to be queried.
                             default : all
                     verbose : bool
                             Whether to return lengths only, or lists of ids for each element
                     """
                     targets = self._build_targets(targets)[1]
                     content = dict(targets=targets, verbose=verbose)
                     self.session.send(self._query_socket, "queue_request", content=content)
                     idents,msg = self.session.recv(self._query_socket, 0)
                     if self.debug:
                         pprint(msg)
                     content = msg['content']
                     status = content.pop('status')
                     if status != 'ok':
                         raise ss.unwrap_exception(content)
                     return ss.rekey(content)
                 @spinfirst
                 def purge_results(self, msg_ids=[], targets=[]):
                     """Tell the controller to forget results.
                     Individual results can be purged by msg_id, or the entire
                     history of specific targets can be purged.
                     Parameters
                     ----------
                     msg_ids : str or list of strs
                             the msg_ids whose results should be forgotten.
                     targets : int/str/list of ints/strs
                             The targets, by uuid or int_id, whose entire history is to be purged.
                             Use `targets='all'` to scrub everything from the controller's memory.
                             default : None
                     """
                     if not targets and not msg_ids:
                         raise ValueError
                     if targets:
                         targets = self._build_targets(targets)[1]
                     content = dict(targets=targets, msg_ids=msg_ids)
                     self.session.send(self._query_socket, "purge_request", content=content)
                     idents, msg = self.session.recv(self._query_socket, 0)
                     if self.debug:
                         pprint(msg)
                     content = msg['content']
                     if content['status'] != 'ok':
                         raise ss.unwrap_exception(content)
             __all__ = [ 'Client',
                         'depend',
                         'require',
                         'remote',
                         'parallel',
                         'RemoteFunction',
                         'ParallelFunction',
                         'DirectView',
                         'LoadBalancedView',
                         'AsyncResult',
                         'AsyncMapResult'
                         ]

IPython/zmq/parallel/remotefunction.py

0 +16 -6

             """Remote Functions and decorators for the client."""
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
+            import warnings
             import map as Map
             from asyncresult import AsyncMapResult
             #-----------------------------------------------------------------------------
             # Decorators
             #-----------------------------------------------------------------------------
-            def remote(client, bound=False, block=None, targets=None, balanced=None):
+            def remote(client, bound=True, block=None, targets=None, balanced=None):
                 """Turn a function into a remote function.
                 This method can be used for map:
                 >>> @remote(client,block=True)
                     def func(a)
                 """
                 def remote_function(f):
                     return RemoteFunction(client, f, bound, block, targets, balanced)
                 return remote_function
-            def parallel(client, dist='b', bound=False, block=None, targets='all', balanced=None):
+            def parallel(client, dist='b', bound=True, block=None, targets='all', balanced=None):
                 """Turn a function into a parallel remote function.
                 This method can be used for map:
                 >>> @parallel(client,block=True)
                     def func(a)
                 """
                 def parallel_function(f):
                     return ParallelFunction(client, f, dist, bound, block, targets, balanced)
                 return parallel_function
             #--------------------------------------------------------------------------
             # Classes
             #--------------------------------------------------------------------------
             class RemoteFunction(object):
                 """Turn an existing function into a remote function.
                 Parameters
                 ----------
                 client : Client instance
                     The client to be used to connect to engines
                 f : callable
                     The function to be wrapped into a remote function
                 bound : bool [default: False]
                     Whether the affect the remote namespace when called
                 block : bool [default: None]
                     Whether to wait for results or not.  The default behavior is
                     to use the current `block` attribute of `client`
                 targets : valid target list [default: all]
                     The targets on which to execute.
                 balanced : bool
                     Whether to load-balance with the Task scheduler or not
                 """
                 client = None # the remote connection
                 func = None # the wrapped function
                 block = None # whether to block
                 bound = None # whether to affect the namespace
                 targets = None # where to execute
                 balanced = None # whether to load-balance
                 def __init__(self, client, f, bound=False, block=None, targets=None, balanced=None):
                     self.client = client
                     self.func = f
                     self.block=block
                     self.bound=bound
                     self.targets=targets
                     if balanced is None:
                         if targets is None:
                             balanced = True
                         else:
                             balanced = False
                     self.balanced = balanced
                 def __call__(self, *args, **kwargs):
                     return self.client.apply(self.func, args=args, kwargs=kwargs,
                             block=self.block, targets=self.targets, bound=self.bound, balanced=self.balanced)
             class ParallelFunction(RemoteFunction):
                 """Class for mapping a function to sequences."""
-                def __init__(self, client, f, dist='b', bound=False, block=None, targets='all', balanced=None):
+                def __init__(self, client, f, dist='b', bound=False, block=None, targets='all', balanced=None, chunk_size=None):
                     super(ParallelFunction, self).__init__(client,f,bound,block,targets,balanced)
+                    self.chunk_size = chunk_size
                     mapClass = Map.dists[dist]
                     self.mapObject = mapClass()
                 def __call__(self, *sequences):
                     len_0 = len(sequences[0])
                     for s in sequences:
                         if len(s)!=len_0:
                             msg = 'all sequences must have equal length, but %i!=%i'%(len_0,len(s))
                             raise ValueError(msg)
                     if self.balanced:
-                        targets = [self.targets]*len_0
+                        if self.chunk_size:
+                            nparts = len_0/self.chunk_size + int(len_0%self.chunk_size > 0)
+                        else:
+                            nparts = len_0
+                        targets = [self.targets]*nparts
                     else:
+                        if self.chunk_size:
+                            warnings.warn("`chunk_size` is ignored when `balanced=False", UserWarning)
                         # multiplexed:
                         targets = self.client._build_targets(self.targets)[-1]
+                        nparts = len(targets)
-                    nparts = len(targets)
                     msg_ids = []
                     # my_f = lambda *a: map(self.func, *a)
                     for index, t in enumerate(targets):
                         args = []
                         for seq in sequences:
                             part = self.mapObject.getPartition(seq, index, nparts)
                             if not part:
                                 continue
                             else:
                                 args.append(part)
                         if not args:
                             continue
                         # print (args)
                         if hasattr(self, '_map'):
                             f = map
                             args = [self.func]+args
                         else:
                             f=self.func
                         ar = self.client.apply(f, args=args, block=False, bound=self.bound,
-                                    targets=targets, balanced=self.balanced)
+                                    targets=t, balanced=self.balanced)
                         msg_ids.append(ar.msg_ids[0])
                     r = AsyncMapResult(self.client, msg_ids, self.mapObject, fname=self.func.__name__)
                     if self.block:
                         try:
                             return r.get()
                         except KeyboardInterrupt:
                             return r
                     else:
                         return r
                 def map(self, *sequences):
                     """call a function on each element of a sequence remotely."""
                     self._map = True
                     try:
                         ret = self.__call__(*sequences)
                     finally:
                         del self._map
                     return ret

IPython/zmq/parallel/view.py

0 +50 -30

             """Views of remote engines"""
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             from IPython.utils.traitlets import HasTraits, Bool, List, Dict, Set, Int, Instance
             from IPython.external.decorator import decorator
             from IPython.zmq.parallel.asyncresult import AsyncResult
             from IPython.zmq.parallel.dependency import Dependency
             from IPython.zmq.parallel.remotefunction import ParallelFunction, parallel
             #-----------------------------------------------------------------------------
             # Decorators
             #-----------------------------------------------------------------------------
             @decorator
             def myblock(f, self, *args, **kwargs):
                 """override client.block with self.block during a call"""
                 block = self.client.block
                 self.client.block = self.block
                 try:
                     ret = f(self, *args, **kwargs)
                 finally:
                     self.client.block = block
                 return ret
             @decorator
             def save_ids(f, self, *args, **kwargs):
                 """Keep our history and outstanding attributes up to date after a method call."""
                 n_previous = len(self.client.history)
                 ret = f(self, *args, **kwargs)
                 nmsgs = len(self.client.history) - n_previous
                 msg_ids = self.client.history[-nmsgs:]
                 self.history.extend(msg_ids)
                 map(self.outstanding.add, msg_ids)
                 return ret
             @decorator
             def sync_results(f, self, *args, **kwargs):
                 """sync relevant results from self.client to our results attribute."""
                 ret = f(self, *args, **kwargs)
                 delta = self.outstanding.difference(self.client.outstanding)
                 completed = self.outstanding.intersection(delta)
                 self.outstanding = self.outstanding.difference(completed)
                 for msg_id in completed:
                     self.results[msg_id] = self.client.results[msg_id]
                 return ret
             @decorator
             def spin_after(f, self, *args, **kwargs):
                 """call spin after the method."""
                 ret = f(self, *args, **kwargs)
                 self.spin()
                 return ret
             #-----------------------------------------------------------------------------
             # Classes
             #-----------------------------------------------------------------------------
             class View(HasTraits):
                 """Base View class for more convenint apply(f,*args,**kwargs) syntax via attributes.
                 Don't use this class, use subclasses.
                 """
                 block=Bool(False)
                 bound=Bool(False)
                 history=List()
                 outstanding = Set()
                 results = Dict()
                 client = Instance('IPython.zmq.parallel.client.Client')
                 _ntargets = Int(1)
                 _balanced = Bool(False)
                 _default_names = List(['block', 'bound'])
                 _targets = None
                 def __init__(self, client=None, targets=None):
                     super(View, self).__init__(client=client)
                     self._targets = targets
                     self._ntargets = 1 if isinstance(targets, (int,type(None))) else len(targets)
                     self.block = client.block
                     for name in self._default_names:
                         setattr(self, name, getattr(self, name, None))
                 def __repr__(self):
                     strtargets = str(self._targets)
                     if len(strtargets) > 16:
                         strtargets = strtargets[:12]+'...]'
                     return "<%s %s>"%(self.__class__.__name__, strtargets)
                 @property
                 def targets(self):
                     return self._targets
                 @targets.setter
                 def targets(self, value):
                     raise AttributeError("Cannot set View `targets` after construction!")
                 def _defaults(self, *excludes):
                     """return dict of our default attributes, excluding names given."""
                     d = dict(balanced=self._balanced, targets=self.targets)
                     for name in self._default_names:
                         if name not in excludes:
                             d[name] = getattr(self, name)
                     return d
                 def set_flags(self, **kwargs):
                     """set my attribute flags by keyword.
                     A View is a wrapper for the Client's apply method, but
                     with attributes that specify keyword arguments, those attributes
                     can be set by keyword argument with this method.
                     Parameters
                     ----------
                     block : bool
                         whether to wait for results
                     bound : bool
                         whether to use the client's namespace
                     """
                     for key in kwargs:
                         if key not in self._default_names:
                             raise KeyError("Invalid name: %r"%key)
                     for name in ('block', 'bound'):
                         if name in kwargs:
-                            setattr(self, name, kwargs)
+                            setattr(self, name, kwargs[name])
                 #----------------------------------------------------------------
                 # wrappers for client methods:
                 #----------------------------------------------------------------
                 @sync_results
                 def spin(self):
                     """spin the client, and sync"""
                     self.client.spin()
                 @sync_results
                 @save_ids
                 def apply(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) on remote engines, returning the result.
                     This method does not involve the engine's namespace.
                     if self.block is False:
                         returns msg_id
                     else:
                         returns actual result of f(*args, **kwargs)
                     """
                     return self.client.apply(f, args, kwargs, **self._defaults())
                 @save_ids
                 def apply_async(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) on remote engines in a nonblocking manner.
                     This method does not involve the engine's namespace.
                     returns msg_id
                     """
                     d = self._defaults('block', 'bound')
                     return self.client.apply(f,args,kwargs, block=False, bound=False, **d)
                 @spin_after
                 @save_ids
                 def apply_sync(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) on remote engines in a blocking manner,
                      returning the result.
                     This method does not involve the engine's namespace.
                     returns: actual result of f(*args, **kwargs)
                     """
                     d = self._defaults('block', 'bound')
                     return self.client.apply(f,args,kwargs, block=True, bound=False, **d)
                 @sync_results
                 @save_ids
                 def apply_bound(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) bound to engine namespace(s).
                     if self.block is False:
                         returns msg_id
                     else:
                         returns actual result of f(*args, **kwargs)
                     This method has access to the targets' globals
                     """
                     d = self._defaults('bound')
                     return self.client.apply(f, args, kwargs, bound=True, **d)
                 @sync_results
                 @save_ids
                 def apply_async_bound(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) bound to engine namespace(s)
                     in a nonblocking manner.
                     returns: msg_id
                     This method has access to the targets' globals
                     """
                     d = self._defaults('block', 'bound')
                     return self.client.apply(f, args, kwargs, block=False, bound=True, **d)
                 @spin_after
                 @save_ids
                 def apply_sync_bound(self, f, *args, **kwargs):
                     """calls f(*args, **kwargs) bound to engine namespace(s), waiting for the result.
                     returns: actual result of f(*args, **kwargs)
                     This method has access to the targets' globals
                     """
                     d = self._defaults('block', 'bound')
                     return self.client.apply(f, args, kwargs, block=True, bound=True, **d)
                 def abort(self, msg_ids=None, block=None):
                     """Abort jobs on my engines.
                     Parameters
                     ----------
                     msg_ids : None, str, list of strs, optional
                         if None: abort all jobs.
                         else: abort specific msg_id(s).
                     """
                     block = block if block is not None else self.block
                     return self.client.abort(msg_ids=msg_ids, targets=self.targets, block=block)
                 def queue_status(self, verbose=False):
                     """Fetch the Queue status of my engines"""
                     return self.client.queue_status(targets=self.targets, verbose=verbose)
                 def purge_results(self, msg_ids=[], targets=[]):
                     """Instruct the controller to forget specific results."""
                     if targets is None or targets == 'all':
                         targets = self.targets
                     return self.client.purge_results(msg_ids=msg_ids, targets=targets)
                 #-------------------------------------------------------------------
+                # Map
+                #-------------------------------------------------------------------
+                def map(self, f, *sequences, **kwargs):
+                    """override in subclasses"""
+                    raise NotImplementedError
+                def map_async(self, f, *sequences, **kwargs):
+                    """Parallel version of builtin `map`, using this view's engines.
+                    This is equivalent to map(...block=False)
+                    See `map` for details.
+                    """
+                    if 'block' in kwargs:
+                        raise TypeError("map_async doesn't take a `block` keyword argument.")
+                    kwargs['block'] = False
+                    return self.map(f,*sequences,**kwargs)
+                def map_sync(self, f, *sequences, **kwargs):
+                    """Parallel version of builtin `map`, using this view's engines.
+                    This is equivalent to map(...block=True)
+                    See `map` for details.
+                    """
+                    if 'block' in kwargs:
+                        raise TypeError("map_sync doesn't take a `block` keyword argument.")
+                    kwargs['block'] = True
+                    return self.map(f,*sequences,**kwargs)
+                #-------------------------------------------------------------------
                 # Decorators
                 #-------------------------------------------------------------------
-                def parallel(self, bound=True, block=True):
-                    """Decorator for making a ParallelFunction"""
-                    return parallel(self.client, bound=bound, targets=self.targets, block=block, balanced=self._balanced)
                 def remote(self, bound=True, block=True):
                     """Decorator for making a RemoteFunction"""
-                    return parallel(self.client, bound=bound, targets=self.targets, block=block, balanced=self._balanced)
+                    return remote(self.client, bound=bound, targets=self.targets, block=block, balanced=self._balanced)
+                def parallel(self, dist='b', bound=True, block=None):
+                    """Decorator for making a ParallelFunction"""
+                    block = self.block if block is None else block
+                    return parallel(self.client, bound=bound, targets=self.targets, block=block, balanced=self._balanced)
             class DirectView(View):
                 """Direct Multiplexer View of one or more engines.
                 These are created via indexed access to a client:
                 >>> dv_1 = client[1]
                 >>> dv_all = client[:]
                 >>> dv_even = client[::2]
                 >>> dv_some = client[1:3]
                 This object provides dictionary access to engine namespaces:
                 # push a=5:
                 >>> dv['a'] = 5
                 # pull 'foo':
                 >>> db['foo']
                 """
                 def __init__(self, client=None, targets=None):
                     super(DirectView, self).__init__(client=client, targets=targets)
                     self._balanced = False
                 @spin_after
                 @save_ids
                 def map(self, f, *sequences, **kwargs):
                     """Parallel version of builtin `map`, using this View's `targets`.
                     There will be one task per target, so work will be chunked
                     if the sequences are longer than `targets`.
                     Results can be iterated as they are ready, but will become available in chunks.
                     Parameters
                     ----------
                     f : callable
                         function to be mapped
                     *sequences: one or more sequences of matching length
                         the sequences to be distributed and passed to `f`
                     block : bool
                         whether to wait for the result or not [default self.block]
                     bound : bool
                         whether to wait for the result or not [default self.bound]
                     Returns
                     -------
                     if block=False:
                         AsyncMapResult
                             An object like AsyncResult, but which reassembles the sequence of results
                             into a single list. AsyncMapResults can be iterated through before all
                             results are complete.
                         else:
                             the result of map(f,*sequences)
                     """
                     block = kwargs.get('block', self.block)
                     bound = kwargs.get('bound', self.bound)
                     for k in kwargs.keys():
                         if k not in ['block', 'bound']:
                             raise TypeError("invalid keyword arg, %r"%k)
                     assert len(sequences) > 0, "must have some sequences to map onto!"
-                    pf = ParallelFunction(self.client, f, block=block,
+                    pf = ParallelFunction(self.client, f, block=block, bound=bound,
-                                    bound=bound, targets=self.targets, balanced=False)
+                                    targets=self.targets, balanced=False)
                     return pf.map(*sequences)
-                def map_async(self, f, *sequences, **kwargs):
-                    """Parallel version of builtin `map`, using this view's engines."""
-                    if 'block' in kwargs:
-                        raise TypeError("map_async doesn't take a `block` keyword argument.")
-                    kwargs['block'] = True
-                    return self.map(f,*sequences,**kwargs)
                 @sync_results
                 @save_ids
                 def execute(self, code, block=True):
                     """execute some code on my targets."""
                     return self.client.execute(code, block=block, targets=self.targets)
                 def update(self, ns):
                     """update remote namespace with dict `ns`"""
                     return self.client.push(ns, targets=self.targets, block=self.block)
                 push = update
                 def get(self, key_s):
                     """get object(s) by `key_s` from remote namespace
                     will return one object if it is a key.
                     It also takes a list of keys, and will return a list of objects."""
                     # block = block if block is not None else self.block
                     return self.client.pull(key_s, block=True, targets=self.targets)
                 @sync_results
                 @save_ids
                 def pull(self, key_s, block=True):
                     """get object(s) by `key_s` from remote namespace
                     will return one object if it is a key.
                     It also takes a list of keys, and will return a list of objects."""
                     block = block if block is not None else self.block
                     return self.client.pull(key_s, block=block, targets=self.targets)
                 def scatter(self, key, seq, dist='b', flatten=False, targets=None, block=None):
                     """
                     Partition a Python sequence and send the partitions to a set of engines.
                     """
                     block = block if block is not None else self.block
                     targets = targets if targets is not None else self.targets
                     return self.client.scatter(key, seq, dist=dist, flatten=flatten,
                                 targets=targets, block=block)
                 @sync_results
                 @save_ids
                 def gather(self, key, dist='b', targets=None, block=None):
                     """
                     Gather a partitioned sequence on a set of engines as a single local seq.
                     """
                     block = block if block is not None else self.block
                     targets = targets if targets is not None else self.targets
                     return self.client.gather(key, dist=dist, targets=targets, block=block)
                 def __getitem__(self, key):
                     return self.get(key)
                 def __setitem__(self,key, value):
                     self.update({key:value})
                 def clear(self, block=False):
                     """Clear the remote namespaces on my engines."""
                     block = block if block is not None else self.block
                     return self.client.clear(targets=self.targets, block=block)
                 def kill(self, block=True):
                     """Kill my engines."""
                     block = block if block is not None else self.block
                     return self.client.kill(targets=self.targets, block=block)
                 #----------------------------------------
                 # activate for %px,%autopx magics
                 #----------------------------------------
                 def activate(self):
                     """Make this `View` active for parallel magic commands.
                     IPython has a magic command syntax to work with `MultiEngineClient` objects.
                     In a given IPython session there is a single active one.  While
                     there can be many `Views` created and used by the user,
                     there is only one active one.  The active `View` is used whenever
                     the magic commands %px and %autopx are used.
                     The activate() method is called on a given `View` to make it
                     active.  Once this has been done, the magic commands can be used.
                     """
                     try:
                         # This is injected into __builtins__.
                         ip = get_ipython()
                     except NameError:
                         print "The IPython parallel magics (%result, %px, %autopx) only work within IPython."
                     else:
                         pmagic = ip.plugin_manager.get_plugin('parallelmagic')
                         if pmagic is not None:
                             pmagic.active_multiengine_client = self
                         else:
                             print "You must first load the parallelmagic extension " \
                                   "by doing '%load_ext parallelmagic'"
             class LoadBalancedView(View):
                 """An load-balancing View that only executes via the Task scheduler.
                 Load-balanced views can be created with the client's `view` method:
                 >>> v = client.view(balanced=True)
                 or targets can be specified, to restrict the potential destinations:
                 >>> v = client.view([1,3],balanced=True)
                 which would restrict loadbalancing to between engines 1 and 3.
                 """
-                _apply_name = 'apply_balanced'
                 _default_names = ['block', 'bound', 'follow', 'after', 'timeout']
                 def __init__(self, client=None, targets=None):
                     super(LoadBalancedView, self).__init__(client=client, targets=targets)
                     self._ntargets = 1
+                    self._balanced = True
                 def _validate_dependency(self, dep):
                     """validate a dependency.
                     For use in `set_flags`.
                     """
                     if dep is None or isinstance(dep, (str, AsyncResult, Dependency)):
                         return True
                     elif isinstance(dep, (list,set, tuple)):
                         for d in dep:
                             if not isinstance(d, str, AsyncResult):
                                 return False
                     elif isinstance(dep, dict):
                         if set(dep.keys()) != set(Dependency().as_dict().keys()):
                             return False
                         if not isinstance(dep['msg_ids'], list):
                             return False
                         for d in dep['msg_ids']:
                             if not isinstance(d, str):
                                 return False
                     else:
                         return False
                 def set_flags(self, **kwargs):
                     """set my attribute flags by keyword.
                     A View is a wrapper for the Client's apply method, but
                     with attributes that specify keyword arguments, those attributes
                     can be set by keyword argument with this method.
                     Parameters
                     ----------
                     block : bool
                         whether to wait for results
                     bound : bool
                         whether to use the engine's namespace
                     follow : Dependency, list, msg_id, AsyncResult
                         the location dependencies of tasks
                     after : Dependency, list, msg_id, AsyncResult
                         the time dependencies of tasks
                     timeout : int,None
                         the timeout to be used for tasks
                     """
                     super(LoadBalancedView, self).set_flags(**kwargs)
                     for name in ('follow', 'after'):
                         if name in kwargs:
                             value = kwargs[name]
                             if self._validate_dependency(value):
                                 setattr(self, name, value)
                             else:
                                 raise ValueError("Invalid dependency: %r"%value)
                     if 'timeout' in kwargs:
                         t = kwargs['timeout']
                         if not isinstance(t, (int, long, float, None)):
                             raise TypeError("Invalid type for timeout: %r"%type(t))
                         if t is not None:
                             if t < 0:
                                 raise ValueError("Invalid timeout: %s"%t)
                         self.timeout = t
                 @spin_after
                 @save_ids
                 def map(self, f, *sequences, **kwargs):
                     """Parallel version of builtin `map`, load-balanced by this View.
                     Each element will be a separate task, and will be load-balanced.  This
                     lets individual elements be available for iteration as soon as they arrive.
                     Parameters
                     ----------
                     f : callable
                         function to be mapped
                     *sequences: one or more sequences of matching length
                         the sequences to be distributed and passed to `f`
                     block : bool
                         whether to wait for the result or not [default self.block]
                     bound : bool
                         whether to use the engine's namespace
                     Returns
                     -------
                     if block=False:
                         AsyncMapResult
                             An object like AsyncResult, but which reassembles the sequence of results
                             into a single list. AsyncMapResults can be iterated through before all
                             results are complete.
                         else:
                             the result of map(f,*sequences)
                     """
+                    # default
                     block = kwargs.get('block', self.block)
                     bound = kwargs.get('bound', self.bound)
+                    chunk_size = kwargs.get('chunk_size', 1)
+                    keyset = set(kwargs.keys())
+                    extra_keys = keyset.difference_update(set(['block', 'bound', 'chunk_size']))
+                    if extra_keys:
+                        raise TypeError("Invalid kwargs: %s"%list(extra_keys))
                     assert len(sequences) > 0, "must have some sequences to map onto!"
                     pf = ParallelFunction(self.client, f, block=block, bound=bound,
-                                            targets=self.targets, balanced=True)
+                                            targets=self.targets, balanced=True,
+                                            chunk_size=chunk_size)
                     return pf.map(*sequences)
-                def map_async(self, f, *sequences, **kwargs):
-                    """Parallel version of builtin `map`, using this view's engines.
-                    This is equivalent to map(...block=False)
-                    See `map` for details.
-                    """
-                    if 'block' in kwargs:
-                        raise TypeError("map_async doesn't take a `block` keyword argument.")
-                    kwargs['block'] = True
-                    return self.map(f,*sequences,**kwargs)

docs/examples/newparallel/dagdeps.py

0 +10 -1

             """Example for generating an arbitrary DAG as a dependency map.
             This demo uses networkx to generate the graph.
             Authors
             -------
             * MinRK
             """
             import networkx as nx
             from random import randint, random
             from IPython.zmq.parallel import client as cmod
             def randomwait():
                 import time
                 from random import random
                 time.sleep(random())
                 return time.time()
             def random_dag(nodes, edges):
                 """Generate a random Directed Acyclic Graph (DAG) with a given number of nodes and edges."""
                 G = nx.DiGraph()
                 for i in range(nodes):
                     G.add_node(i)
                 while edges > 0:
                     a = randint(0,nodes-1)
                     b=a
                     while b==a:
                         b = randint(0,nodes-1)
                     G.add_edge(a,b)
                     if nx.is_directed_acyclic_graph(G):
                         edges -= 1
                     else:
                         # we closed a loop!
                         G.remove_edge(a,b)
                 return G
             def add_children(G, parent, level, n=2):
                 """Add children recursively to a binary tree."""
                 if level == 0:
                     return
                 for i in range(n):
                     child = parent+str(i)
                     G.add_node(child)
                     G.add_edge(parent,child)
                     add_children(G, child, level-1, n)
             def make_bintree(levels):
                 """Make a symmetrical binary tree with @levels"""
                 G = nx.DiGraph()
                 root = '0'
                 G.add_node(root)
                 add_children(G, root, levels, 2)
                 return G
             def submit_jobs(client, G, jobs):
                 """Submit jobs via client where G describes the time dependencies."""
                 results = {}
                 for node in nx.topological_sort(G):
                     deps = [ results[n] for n in G.predecessors(node) ]
                     results[node] = client.apply(jobs[node], after=deps)
                 return results
             def validate_tree(G, results):
                 """Validate that jobs executed after their dependencies."""
                 for node in G:
                     started = results[node].metadata.started
                     for parent in G.predecessors(node):
                         finished = results[parent].metadata.completed
                         assert started > finished, "%s should have happened after %s"%(node, parent)
             def main(nodes, edges):
                 """Generate a random graph, submit jobs, then validate that the
                 dependency order was enforced.
                 Finally, plot the graph, with time on the x-axis, and
                 in-degree on the y (just for spread).  All arrows must
                 point at least slightly to the right if the graph is valid.
                 """
+                import pylab
                 from matplotlib.dates import date2num
                 from matplotlib.cm import gist_rainbow
                 print "building DAG"
                 G = random_dag(nodes, edges)
                 jobs = {}
                 pos = {}
                 colors = {}
                 for node in G:
                     jobs[node] = randomwait
                 client = cmod.Client()
                 print "submitting %i tasks with %i dependencies"%(nodes,edges)
                 results = submit_jobs(client, G, jobs)
                 print "waiting for results"
                 client.barrier()
                 print "done"
                 for node in G:
                     md = results[node].metadata
                     start = date2num(md.started)
                     runtime = date2num(md.completed) - start
                     pos[node] = (start, runtime)
                     colors[node] = md.engine_id
                 validate_tree(G, results)
-                nx.draw(G, pos, node_list = colors.keys(), node_color=colors.values(), cmap=gist_rainbow)
+                nx.draw(G, pos, node_list=colors.keys(), node_color=colors.values(), cmap=gist_rainbow,
+                        with_labels=False)
+                x,y = zip(*pos.values())
+                xmin,ymin = map(min, (x,y))
+                xmax,ymax = map(max, (x,y))
+                xscale = xmax-xmin
+                yscale = ymax-ymin
+                pylab.xlim(xmin-xscale*.1,xmax+xscale*.1)
+                pylab.ylim(ymin-yscale*.1,ymax+yscale*.1)
                 return G,results
             if __name__ == '__main__':
                 import pylab
                 # main(5,10)
                 main(32,96)
                 pylab.show()
   No newline at end of file

docs/examples/newparallel/mcdriver.py

0 +1 -1

             #!/usr/bin/env python
             """Run a Monte-Carlo options pricer in parallel."""
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             import sys
             import time
             from IPython.zmq.parallel import client
             import numpy as np
             from mcpricer import price_options
             from matplotlib import pyplot as plt
             #-----------------------------------------------------------------------------
             # Setup parameters for the run
             #-----------------------------------------------------------------------------
             def ask_question(text, the_type, default):
                 s = '%s [%r]: ' % (text, the_type(default))
                 result = raw_input(s)
                 if result:
                     return the_type(result)
                 else:
                     return the_type(default)
             cluster_profile = ask_question("Cluster profile", str, "default")
             price = ask_question("Initial price", float, 100.0)
             rate = ask_question("Interest rate", float, 0.05)
             days = ask_question("Days to expiration", int, 260)
             paths = ask_question("Number of MC paths", int, 10000)
             n_strikes = ask_question("Number of strike values", int, 5)
             min_strike = ask_question("Min strike price", float, 90.0)
             max_strike = ask_question("Max strike price", float, 110.0)
             n_sigmas = ask_question("Number of volatility values", int, 5)
             min_sigma = ask_question("Min volatility", float, 0.1)
             max_sigma = ask_question("Max volatility", float, 0.4)
             strike_vals = np.linspace(min_strike, max_strike, n_strikes)
             sigma_vals = np.linspace(min_sigma, max_sigma, n_sigmas)
             #-----------------------------------------------------------------------------
             # Setup for parallel calculation
             #-----------------------------------------------------------------------------
             # The Client is used to setup the calculation and works with all
             # engines.
             c = client.Client(profile=cluster_profile)
             # A LoadBalancedView is an interface to the engines that provides dynamic load
             # balancing at the expense of not knowing which engine will execute the code.
-            view = c[None]
+            view = c.view()
             # Initialize the common code on the engines. This Python module has the
             # price_options function that prices the options.
             #-----------------------------------------------------------------------------
             # Perform parallel calculation
             #-----------------------------------------------------------------------------
             print "Running parallel calculation over strike prices and volatilities..."
             print "Strike prices: ", strike_vals
             print "Volatilities: ", sigma_vals
             sys.stdout.flush()
             # Submit tasks to the TaskClient for each (strike, sigma) pair as a MapTask.
             t1 = time.time()
             async_results = []
             for strike in strike_vals:
                 for sigma in sigma_vals:
                     ar = view.apply_async(price_options, price, strike, sigma, rate, days, paths)
                     async_results.append(ar)
             print "Submitted tasks: ", len(async_results)
             sys.stdout.flush()
             # Block until all tasks are completed.
             c.barrier(async_results)
             t2 = time.time()
             t = t2-t1
             print "Parallel calculation completed, time = %s s" % t
             print "Collecting results..."
             # Get the results using TaskClient.get_task_result.
             results = [ar.get() for ar in async_results]
             # Assemble the result into a structured NumPy array.
             prices = np.empty(n_strikes*n_sigmas,
                 dtype=[('ecall',float),('eput',float),('acall',float),('aput',float)]
             )
             for i, price in enumerate(results):
                 prices[i] = tuple(price)
             prices.shape = (n_strikes, n_sigmas)
             strike_mesh, sigma_mesh = np.meshgrid(strike_vals, sigma_vals)
             print "Results are available: strike_mesh, sigma_mesh, prices"
             print "To plot results type 'plot_options(sigma_mesh, strike_mesh, prices)'"
             #-----------------------------------------------------------------------------
             # Utilities
             #-----------------------------------------------------------------------------
             def plot_options(sigma_mesh, strike_mesh, prices):
                 """
                 Make a contour plot of the option price in (sigma, strike) space.
                 """
                 plt.figure(1)
                 plt.subplot(221)
                 plt.contourf(sigma_mesh, strike_mesh, prices['ecall'])
                 plt.axis('tight')
                 plt.colorbar()
                 plt.title('European Call')
                 plt.ylabel("Strike Price")
                 plt.subplot(222)
                 plt.contourf(sigma_mesh, strike_mesh, prices['acall'])
                 plt.axis('tight')
                 plt.colorbar()
                 plt.title("Asian Call")
                 plt.subplot(223)
                 plt.contourf(sigma_mesh, strike_mesh, prices['eput'])
                 plt.axis('tight')
                 plt.colorbar()
                 plt.title("European Put")
                 plt.xlabel("Volatility")
                 plt.ylabel("Strike Price")
                 plt.subplot(224)
                 plt.contourf(sigma_mesh, strike_mesh, prices['aput'])
                 plt.axis('tight')
                 plt.colorbar()
                 plt.title("Asian Put")
                 plt.xlabel("Volatility")

docs/examples/newparallel/parallelpi.py

0 +7 -6

             """Calculate statistics on the digits of pi in parallel.
             This program uses the functions in :file:`pidigits.py` to calculate
             the frequencies of 2 digit sequences in the digits of pi. The
             results are plotted using matplotlib.
             To run, text files from http://www.super-computing.org/
             must be installed in the working directory of the IPython engines.
             The actual filenames to be used can be set with the ``filestring``
             variable below.
             The dataset we have been using for this is the 200 million digit one here:
             ftp://pi.super-computing.org/.2/pi200m/
             and the files used will be downloaded if they are not in the working directory
             of the IPython engines.
             """
             from IPython.zmq.parallel import client
             from matplotlib import pyplot as plt
             import numpy as np
             from pidigits import *
             from timeit import default_timer as clock
             # Files with digits of pi (10m digits each)
             filestring = 'pi200m.ascii.%(i)02dof20'
             files = [filestring % {'i':i} for i in range(1,16)]
             # Connect to the IPython cluster
-            c = client.Client()
+            c = client.Client(profile='edison')
             c.run('pidigits.py')
             # the number of engines
-            n = len(c.ids)
+            n = len(c)
-            id0 = list(c.ids)[0]
+            id0 = c.ids[0]
+            v = c[:]
+            v.set_flags(bound=True,block=True)
             # fetch the pi-files
             print "downloading %i files of pi"%n
-            c.map(fetch_pi_file, files[:n])
+            v.map(fetch_pi_file, files[:n])
             print "done"
             # Run 10m digits on 1 engine
             t1 = clock()
             freqs10m = c[id0].apply_sync_bound(compute_two_digit_freqs, files[0])
             t2 = clock()
             digits_per_second1 = 10.0e6/(t2-t1)
             print "Digits per second (1 core, 10m digits):   ", digits_per_second1
             # Run n*10m digits on all engines
             t1 = clock()
-            c.block=True
+            freqs_all = v.map(compute_two_digit_freqs, files[:n])
-            freqs_all = c.map(compute_two_digit_freqs, files[:n])
             freqs150m = reduce_freqs(freqs_all)
             t2 = clock()
             digits_per_second8 = n*10.0e6/(t2-t1)
             print "Digits per second (%i engines, %i0m digits): "%(n,n), digits_per_second8
             print "Speedup: ", digits_per_second8/digits_per_second1
             plot_two_digit_freqs(freqs150m)
             plt.title("2 digit sequences in %i0m digits of pi"%n)
             plt.show()

docs/examples/newparallel/pidigits.py

0 +1 -3

             """Compute statistics on the digits of pi.
             This uses precomputed digits of pi from the website
             of Professor Yasumasa Kanada at the University of
             Tokoyo: http://www.super-computing.org/
             Currently, there are only functions to read the
             .txt (non-compressed, non-binary) files, but adding
             support for compression and binary files would be
             straightforward.
             This focuses on computing the number of times that
             all 1, 2, n digits sequences occur in the digits of pi.
             If the digits of pi are truly random, these frequencies
             should be equal.
             """
             # Import statements
             from __future__ import division, with_statement
-            import os
-            import urllib
             import numpy as np
             from matplotlib import pyplot as plt
             # Top-level functions
             def fetch_pi_file(filename):
                 """This will download a segment of pi from super-computing.org
                 if the file is not already present.
                 """
+                import os, urllib
                 ftpdir="ftp://pi.super-computing.org/.2/pi200m/"
                 if os.path.exists(filename):
                     # we already have it
                     return
                 else:
                     # download it
                     urllib.urlretrieve(ftpdir+filename,filename)
             def compute_one_digit_freqs(filename):
                 """
                 Read digits of pi from a file and compute the 1 digit frequencies.
                 """
                 d = txt_file_to_digits(filename)
                 freqs = one_digit_freqs(d)
                 return freqs
             def compute_two_digit_freqs(filename):
                 """
                 Read digits of pi from a file and compute the 2 digit frequencies.
                 """
                 d = txt_file_to_digits(filename)
                 freqs = two_digit_freqs(d)
                 return freqs
             def reduce_freqs(freqlist):
                 """
                 Add up a list of freq counts to get the total counts.
                 """
                 allfreqs = np.zeros_like(freqlist[0])
                 for f in freqlist:
                     allfreqs += f
                 return allfreqs
             def compute_n_digit_freqs(filename, n):
                 """
                 Read digits of pi from a file and compute the n digit frequencies.
                 """
                 d = txt_file_to_digits(filename)
                 freqs = n_digit_freqs(d, n)
                 return freqs
             # Read digits from a txt file
             def txt_file_to_digits(filename, the_type=str):
                 """
                 Yield the digits of pi read from a .txt file.
                 """
                 with open(filename, 'r') as f:
                     for line in f.readlines():
                         for c in line:
                             if c != '\n' and c!= ' ':
                                 yield the_type(c)
             # Actual counting functions
             def one_digit_freqs(digits, normalize=False):
                 """
                 Consume digits of pi and compute 1 digit freq. counts.
                 """
                 freqs = np.zeros(10, dtype='i4')
                 for d in digits:
                     freqs[int(d)] += 1
                 if normalize:
                     freqs = freqs/freqs.sum()
                 return freqs
             def two_digit_freqs(digits, normalize=False):
                 """
                 Consume digits of pi and compute 2 digits freq. counts.
                 """
                 freqs = np.zeros(100, dtype='i4')
                 last = digits.next()
                 this = digits.next()
                 for d in digits:
                     index = int(last + this)
                     freqs[index] += 1
                     last = this
                     this = d
                 if normalize:
                     freqs = freqs/freqs.sum()
                 return freqs
             def n_digit_freqs(digits, n, normalize=False):
                 """
                 Consume digits of pi and compute n digits freq. counts.
                 This should only be used for 1-6 digits.
                 """
                 freqs = np.zeros(pow(10,n), dtype='i4')
                 current = np.zeros(n, dtype=int)
                 for i in range(n):
                     current[i] = digits.next()
                 for d in digits:
                     index = int(''.join(map(str, current)))
                     freqs[index] += 1
                     current[0:-1] = current[1:]
                     current[-1] = d
                 if normalize:
                     freqs = freqs/freqs.sum()
                 return freqs
             # Plotting functions
             def plot_two_digit_freqs(f2):
                 """
                 Plot two digits frequency counts using matplotlib.
                 """
                 f2_copy = f2.copy()
                 f2_copy.shape = (10,10)
                 ax = plt.matshow(f2_copy)
                 plt.colorbar()
                 for i in range(10):
                     for j in range(10):
                         plt.text(i-0.2, j+0.2, str(j)+str(i))
                 plt.ylabel('First digit')
                 plt.xlabel('Second digit')
                 return ax
             def plot_one_digit_freqs(f1):
                 """
                 Plot one digit frequency counts using matplotlib.
                 """
                 ax = plt.plot(f1,'bo-')
                 plt.title('Single digit counts in pi')
                 plt.xlabel('Digit')
                 plt.ylabel('Count')
                 return ax

docs/source/parallelz/parallel_demos.txt

0 +1 0

             =================
             Parallel examples
             =================
             .. note::
                 Performance numbers from ``IPython.kernel``, not newparallel.
             In this section we describe two more involved examples of using an IPython
             cluster to perform a parallel computation. In these examples, we will be using
             IPython's "pylab" mode, which enables interactive plotting using the
             Matplotlib package. IPython can be started in this mode by typing::
                 ipython --pylab
             at the system command line.
 million digits of pi
             ========================
             In this example we would like to study the distribution of digits in the
             number pi (in base 10). While it is not known if pi is a normal number (a
             number is normal in base 10 if 0-9 occur with equal likelihood) numerical
             investigations suggest that it is. We will begin with a serial calculation on
 ,000 digits of pi and then perform a parallel calculation involving 150
             million digits.
             In both the serial and parallel calculation we will be using functions defined
             in the :file:`pidigits.py` file, which is available in the
             :file:`docs/examples/newparallel` directory of the IPython source distribution.
             These functions provide basic facilities for working with the digits of pi and
             can be loaded into IPython by putting :file:`pidigits.py` in your current
             working directory and then doing:
             .. sourcecode:: ipython
                 In [1]: run pidigits.py
             Serial calculation
             ------------------
             For the serial calculation, we will use `SymPy <http://www.sympy.org>`_ to
             calculate 10,000 digits of pi and then look at the frequencies of the digits
 -9. Out of 10,000 digits, we expect each digit to occur 1,000 times. While
             SymPy is capable of calculating many more digits of pi, our purpose here is to
             set the stage for the much larger parallel calculation.
             In this example, we use two functions from :file:`pidigits.py`:
             :func:`one_digit_freqs` (which calculates how many times each digit occurs)
             and :func:`plot_one_digit_freqs` (which uses Matplotlib to plot the result).
             Here is an interactive IPython session that uses these functions with
             SymPy:
             .. sourcecode:: ipython
                 In [7]: import sympy
                 In [8]: pi = sympy.pi.evalf(40)
                 In [9]: pi
                 Out[9]: 3.141592653589793238462643383279502884197
                 In [10]: pi = sympy.pi.evalf(10000)
                 In [11]: digits = (d for d in str(pi)[2:])  # create a sequence of digits
                 In [12]: run pidigits.py  # load one_digit_freqs/plot_one_digit_freqs
                 In [13]: freqs = one_digit_freqs(digits)
                 In [14]: plot_one_digit_freqs(freqs)
                 Out[14]: [<matplotlib.lines.Line2D object at 0x18a55290>]
             The resulting plot of the single digit counts shows that each digit occurs
             approximately 1,000 times, but that with only 10,000 digits the
             statistical fluctuations are still rather large:
             .. image:: ../parallel/single_digits.*
             It is clear that to reduce the relative fluctuations in the counts, we need
             to look at many more digits of pi. That brings us to the parallel calculation.
             Parallel calculation
             --------------------
             Calculating many digits of pi is a challenging computational problem in itself.
             Because we want to focus on the distribution of digits in this example, we
             will use pre-computed digit of pi from the website of Professor Yasumasa
             Kanada at the University of Tokyo (http://www.super-computing.org). These
             digits come in a set of text files (ftp://pi.super-computing.org/.2/pi200m/)
             that each have 10 million digits of pi.
             For the parallel calculation, we have copied these files to the local hard
             drives of the compute nodes. A total of 15 of these files will be used, for a
             total of 150 million digits of pi. To make things a little more interesting we
             will calculate the frequencies of all 2 digits sequences (00-99) and then plot
             the result using a 2D matrix in Matplotlib.
             The overall idea of the calculation is simple: each IPython engine will
             compute the two digit counts for the digits in a single file. Then in a final
             step the counts from each engine will be added up. To perform this
             calculation, we will need two top-level functions from :file:`pidigits.py`:
             .. literalinclude:: ../../examples/newparallel/pidigits.py
                :language: python
                :lines: 41-56
             We will also use the :func:`plot_two_digit_freqs` function to plot the
             results. The code to run this calculation in parallel is contained in
             :file:`docs/examples/newparallel/parallelpi.py`. This code can be run in parallel
             using IPython by following these steps:
 . Use :command:`ipclusterz` to start 15 engines. We used an 8 core (2 quad
                core CPUs) cluster with hyperthreading enabled which makes the 8 cores
                looks like 16 (1 controller + 15 engines) in the OS. However, the maximum
                speedup we can observe is still only 8x.
 . With the file :file:`parallelpi.py` in your current working directory, open
                up IPython in pylab mode and type ``run parallelpi.py``.  This will download
                the pi files via ftp the first time you run it, if they are not
                present in the Engines' working directory.
             When run on our 8 core cluster, we observe a speedup of 7.7x. This is slightly
             less than linear scaling (8x) because the controller is also running on one of
             the cores.
             To emphasize the interactive nature of IPython, we now show how the
             calculation can also be run by simply typing the commands from
             :file:`parallelpi.py` interactively into IPython:
             .. sourcecode:: ipython
                 In [1]: from IPython.zmq.parallel import client
                 # The Client allows us to use the engines interactively.
                 # We simply pass Client the name of the cluster profile we
                 # are using.
                 In [2]: c = client.Client(profile='mycluster')
+                In [3]: view = c.view(balanced=True)
                 In [3]: c.ids
                 Out[3]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
                 In [4]: run pidigits.py
                 In [5]: filestring = 'pi200m.ascii.%(i)02dof20'
                 # Create the list of files to process.
                 In [6]: files = [filestring % {'i':i} for i in range(1,16)]
                 In [7]: files
                 Out[7]:
                 ['pi200m.ascii.01of20',
                  'pi200m.ascii.02of20',
                  'pi200m.ascii.03of20',
                  'pi200m.ascii.04of20',
                  'pi200m.ascii.05of20',
                  'pi200m.ascii.06of20',
                  'pi200m.ascii.07of20',
                  'pi200m.ascii.08of20',
                  'pi200m.ascii.09of20',
                  'pi200m.ascii.10of20',
                  'pi200m.ascii.11of20',
                  'pi200m.ascii.12of20',
                  'pi200m.ascii.13of20',
                  'pi200m.ascii.14of20',
                  'pi200m.ascii.15of20']
                 # download the data files if they don't already exist:
                 In [8]: c.map(fetch_pi_file, files)
                 # This is the parallel calculation using the Client.map method
                 # which applies compute_two_digit_freqs to each file in files in parallel.
                 In [9]: freqs_all = c.map(compute_two_digit_freqs, files)
                 # Add up the frequencies from each engine.
                 In [10]: freqs = reduce_freqs(freqs_all)
                 In [11]: plot_two_digit_freqs(freqs)
                 Out[11]: <matplotlib.image.AxesImage object at 0x18beb110>
                 In [12]: plt.title('2 digit counts of 150m digits of pi')
                 Out[12]: <matplotlib.text.Text object at 0x18d1f9b0>
             The resulting plot generated by Matplotlib is shown below. The colors indicate
             which two digit sequences are more (red) or less (blue) likely to occur in the
             first 150 million digits of pi. We clearly see that the sequence "41" is
             most likely and that "06" and "07" are least likely. Further analysis would
             show that the relative size of the statistical fluctuations have decreased
             compared to the 10,000 digit calculation.
             .. image:: ../parallel/two_digit_counts.*
             Parallel options pricing
             ========================
             An option is a financial contract that gives the buyer of the contract the
             right to buy (a "call") or sell (a "put") a secondary asset (a stock for
             example) at a particular date in the future (the expiration date) for a
             pre-agreed upon price (the strike price). For this right, the buyer pays the
             seller a premium (the option price). There are a wide variety of flavors of
             options (American, European, Asian, etc.) that are useful for different
             purposes: hedging against risk, speculation, etc.
             Much of modern finance is driven by the need to price these contracts
             accurately based on what is known about the properties (such as volatility) of
             the underlying asset. One method of pricing options is to use a Monte Carlo
             simulation of the underlying asset price. In this example we use this approach
             to price both European and Asian (path dependent) options for various strike
             prices and volatilities.
             The code for this example can be found in the :file:`docs/examples/newparallel`
             directory of the IPython source. The function :func:`price_options` in
             :file:`mcpricer.py` implements the basic Monte Carlo pricing algorithm using
             the NumPy package and is shown here:
             .. literalinclude:: ../../examples/newparallel/mcpricer.py
                :language: python
             To run this code in parallel, we will use IPython's :class:`LoadBalancedView` class,
             which distributes work to the engines using dynamic load balancing. This
             view is a wrapper of the :class:`Client` class shown in
             the previous example. The parallel calculation using :class:`LoadBalancedView` can
             be found in the file :file:`mcpricer.py`. The code in this file creates a
             :class:`TaskClient` instance and then submits a set of tasks using
             :meth:`TaskClient.run` that calculate the option prices for different
             volatilities and strike prices. The results are then plotted as a 2D contour
             plot using Matplotlib.
             .. literalinclude:: ../../examples/newparallel/mcdriver.py
                :language: python
             To use this code, start an IPython cluster using :command:`ipclusterz`, open
             IPython in the pylab mode with the file :file:`mcdriver.py` in your current
             working directory and then type:
             .. sourcecode:: ipython
                 In [7]: run mcdriver.py
                 Submitted tasks:  [0, 1, 2, ...]
             Once all the tasks have finished, the results can be plotted using the
             :func:`plot_options` function. Here we make contour plots of the Asian
             call and Asian put options as function of the volatility and strike price:
             .. sourcecode:: ipython
                 In [8]: plot_options(sigma_vals, K_vals, prices['acall'])
                 In [9]: plt.figure()
                 Out[9]: <matplotlib.figure.Figure object at 0x18c178d0>
                 In [10]: plot_options(sigma_vals, K_vals, prices['aput'])
             These results are shown in the two figures below. On a 8 core cluster the
             entire calculation (10 strike prices, 10 volatilities, 100,000 paths for each)
             took 30 seconds in parallel, giving a speedup of 7.7x, which is comparable
             to the speedup observed in our previous example.
             .. image:: ../parallel/asian_call.*
             .. image:: ../parallel/asian_put.*
             Conclusion
             ==========
             To conclude these examples, we summarize the key features of IPython's
             parallel architecture that have been demonstrated:
             * Serial code can be parallelized often with only a few extra lines of code.
               We have used the :class:`DirectView` and :class:`LoadBalancedView` classes
               for this purpose.
             * The resulting parallel code can be run without ever leaving the IPython's
               interactive shell.
             * Any data computed in parallel can be explored interactively through
               visualization or further numerical calculations.
             * We have run these examples on a cluster running Windows HPC Server 2008.
               IPython's built in support for the Windows HPC job scheduler makes it
               easy to get started with IPython's parallel capabilities.
             .. note::
                 The newparallel code has never been run on Windows HPC Server, so the last
                 conclusion is untested.

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages