# encoding: utf-8 # -*- test-case-name: IPython.kernel.test.test_multiengineclient -*- """General Classes for IMultiEngine clients.""" __docformat__ = "restructuredtext en" #------------------------------------------------------------------------------- # Copyright (C) 2008 The IPython Development Team # # Distributed under the terms of the BSD License. The full license is in # the file COPYING, distributed as part of this software. #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- # Imports #------------------------------------------------------------------------------- import sys import cPickle as pickle from types import FunctionType import linecache from twisted.internet import reactor from twisted.python import components, log from twisted.python.failure import Failure from zope.interface import Interface, implements, Attribute from IPython.ColorANSI import TermColors from IPython.kernel.twistedutil import blockingCallFromThread from IPython.kernel import error from IPython.kernel.parallelfunction import ParallelFunction from IPython.kernel import map as Map from IPython.kernel import multiengine as me from IPython.kernel.multiengine import (IFullMultiEngine, IFullSynchronousMultiEngine) #------------------------------------------------------------------------------- # Pending Result things #------------------------------------------------------------------------------- class IPendingResult(Interface): """A representation of a result that is pending. This class is similar to Twisted's `Deferred` object, but is designed to be used in a synchronous context. """ result_id=Attribute("ID of the deferred on the other side") client=Attribute("A client that I came from") r=Attribute("An attribute that is a property that calls and returns get_result") def get_result(default=None, block=True): """ Get a result that is pending. :Parameters: default The value to return if the result is not ready. block : boolean Should I block for the result. :Returns: The actual result or the default value. """ def add_callback(f, *args, **kwargs): """ Add a callback that is called with the result. If the original result is foo, adding a callback will cause f(foo, *args, **kwargs) to be returned instead. If multiple callbacks are registered, they are chained together: the result of one is passed to the next and so on. Unlike Twisted's Deferred object, there is no errback chain. Thus any exception raised will not be caught and handled. User must catch these by hand when calling `get_result`. """ class PendingResult(object): """A representation of a result that is not yet ready. A user should not create a `PendingResult` instance by hand. Methods ======= * `get_result` * `add_callback` Properties ========== * `r` """ def __init__(self, client, result_id): """Create a PendingResult with a result_id and a client instance. The client should implement `_getPendingResult(result_id, block)`. """ self.client = client self.result_id = result_id self.called = False self.raised = False self.callbacks = [] def get_result(self, default=None, block=True): """Get a result that is pending. This method will connect to an IMultiEngine adapted controller and see if the result is ready. If the action triggers an exception raise it and record it. This method records the result/exception once it is retrieved. Calling `get_result` again will get this cached result or will re-raise the exception. The .r attribute is a property that calls `get_result` with block=True. :Parameters: default The value to return if the result is not ready. block : boolean Should I block for the result. :Returns: The actual result or the default value. """ if self.called: if self.raised: raise self.result[0], self.result[1], self.result[2] else: return self.result try: result = self.client.get_pending_deferred(self.result_id, block) except error.ResultNotCompleted: return default except: # Reraise other error, but first record them so they can be reraised # later if .r or get_result is called again. self.result = sys.exc_info() self.called = True self.raised = True raise else: for cb in self.callbacks: result = cb[0](result, *cb[1], **cb[2]) self.result = result self.called = True return result def add_callback(self, f, *args, **kwargs): """Add a callback that is called with the result. If the original result is result, adding a callback will cause f(result, *args, **kwargs) to be returned instead. If multiple callbacks are registered, they are chained together: the result of one is passed to the next and so on. Unlike Twisted's Deferred object, there is no errback chain. Thus any exception raised will not be caught and handled. User must catch these by hand when calling `get_result`. """ assert callable(f) self.callbacks.append((f, args, kwargs)) def __cmp__(self, other): if self.result_id < other.result_id: return -1 else: return 1 def _get_r(self): return self.get_result(block=True) r = property(_get_r) """This property is a shortcut to a `get_result(block=True)`.""" #------------------------------------------------------------------------------- # Pretty printing wrappers for certain lists #------------------------------------------------------------------------------- class ResultList(list): """A subclass of list that pretty prints the output of `execute`/`get_result`.""" def __repr__(self): output = [] blue = TermColors.Blue normal = TermColors.Normal red = TermColors.Red green = TermColors.Green output.append("<Results List>\n") for cmd in self: if isinstance(cmd, Failure): output.append(cmd) else: target = cmd.get('id',None) cmd_num = cmd.get('number',None) cmd_stdin = cmd.get('input',{}).get('translated','No Input') cmd_stdout = cmd.get('stdout', None) cmd_stderr = cmd.get('stderr', None) output.append("%s[%i]%s In [%i]:%s %s\n" % \ (green, target, blue, cmd_num, normal, cmd_stdin)) if cmd_stdout: output.append("%s[%i]%s Out[%i]:%s %s\n" % \ (green, target, red, cmd_num, normal, cmd_stdout)) if cmd_stderr: output.append("%s[%i]%s Err[%i]:\n%s %s" % \ (green, target, red, cmd_num, normal, cmd_stderr)) return ''.join(output) def wrapResultList(result): """A function that wraps the output of `execute`/`get_result` -> `ResultList`.""" if len(result) == 0: result = [result] return ResultList(result) class QueueStatusList(list): """A subclass of list that pretty prints the output of `queue_status`.""" def __repr__(self): output = [] output.append("<Queue Status List>\n") for e in self: output.append("Engine: %s\n" % repr(e[0])) output.append(" Pending: %s\n" % repr(e[1]['pending'])) for q in e[1]['queue']: output.append(" Command: %s\n" % repr(q)) return ''.join(output) #------------------------------------------------------------------------------- # InteractiveMultiEngineClient #------------------------------------------------------------------------------- class InteractiveMultiEngineClient(object): """A mixin class that add a few methods to a multiengine client. The methods in this mixin class are designed for interactive usage. """ def activate(self): """Make this `MultiEngineClient` active for parallel magic commands. IPython has a magic command syntax to work with `MultiEngineClient` objects. In a given IPython session there is a single active one. While there can be many `MultiEngineClient` created and used by the user, there is only one active one. The active `MultiEngineClient` is used whenever the magic commands %px and %autopx are used. The activate() method is called on a given `MultiEngineClient` to make it active. Once this has been done, the magic commands can be used. """ try: __IPYTHON__.activeController = self except NameError: print "The IPython Controller magics only work within IPython." def __setitem__(self, key, value): """Add a dictionary interface for pushing/pulling. This functions as a shorthand for `push`. :Parameters: key : str What to call the remote object. value : object The local Python object to push. """ targets, block = self._findTargetsAndBlock() return self.push({key:value}, targets=targets, block=block) def __getitem__(self, key): """Add a dictionary interface for pushing/pulling. This functions as a shorthand to `pull`. :Parameters: - `key`: A string representing the key. """ if isinstance(key, str): targets, block = self._findTargetsAndBlock() return self.pull(key, targets=targets, block=block) else: raise TypeError("__getitem__ only takes strs") def __len__(self): """Return the number of available engines.""" return len(self.get_ids()) def parallelize(self, func, targets=None, block=None): """Build a `ParallelFunction` object for functionName on engines. The returned object will implement a parallel version of functionName that takes a local sequence as its only argument and calls (in parallel) functionName on each element of that sequence. The `ParallelFunction` object has a `targets` attribute that controls which engines the function is run on. :Parameters: targets : int, list or 'all' The engine ids the action will apply to. Call `get_ids` to see a list of currently available engines. functionName : str A Python string that names a callable defined on the engines. :Returns: A `ParallelFunction` object. Examples ======== >>> psin = rc.parallelize('all','lambda x:sin(x)') >>> psin(range(10000)) [0,2,4,9,25,36,...] """ targets, block = self._findTargetsAndBlock(targets, block) return ParallelFunction(func, self, targets, block) #--------------------------------------------------------------------------- # Make this a context manager for with #--------------------------------------------------------------------------- def findsource_file(self,f): linecache.checkcache() s = findsource(f.f_code) lnum = f.f_lineno wsource = s[0][f.f_lineno:] return strip_whitespace(wsource) def findsource_ipython(self,f): from IPython import ipapi self.ip = ipapi.get() wsource = [l+'\n' for l in self.ip.IP.input_hist_raw[-1].splitlines()[1:]] return strip_whitespace(wsource) def __enter__(self): f = sys._getframe(1) local_ns = f.f_locals global_ns = f.f_globals if f.f_code.co_filename == '<ipython console>': s = self.findsource_ipython(f) else: s = self.findsource_file(f) self._with_context_result = self.execute(s) def __exit__ (self, etype, value, tb): if issubclass(etype,error.StopLocalExecution): return True def remote(): m = 'Special exception to stop local execution of parallel code.' raise error.StopLocalExecution(m) def strip_whitespace(source): # Expand tabs to avoid any confusion. wsource = [l.expandtabs(4) for l in source] # Detect the indentation level done = False for line in wsource: if line.isspace(): continue for col,char in enumerate(line): if char != ' ': done = True break if done: break # Now we know how much leading space there is in the code. Next, we # extract up to the first line that has less indentation. # WARNINGS: we skip comments that may be misindented, but we do NOT yet # detect triple quoted strings that may have flush left text. for lno,line in enumerate(wsource): lead = line[:col] if lead.isspace(): continue else: if not lead.lstrip().startswith('#'): break # The real 'with' source is up to lno src_lines = [l[col:] for l in wsource[:lno+1]] # Finally, check that the source's first non-comment line begins with the # special call 'remote()' for nline,line in enumerate(src_lines): if line.isspace() or line.startswith('#'): continue if 'remote()' in line: break else: raise ValueError('remote() call missing at the start of code') src = ''.join(src_lines[nline+1:]) #print 'SRC:\n<<<<<<<>>>>>>>\n%s<<<<<>>>>>>' % src # dbg return src #------------------------------------------------------------------------------- # The top-level MultiEngine client adaptor #------------------------------------------------------------------------------- class IFullBlockingMultiEngineClient(Interface): pass class FullBlockingMultiEngineClient(InteractiveMultiEngineClient): """ A blocking client to the `IMultiEngine` controller interface. This class allows users to use a set of engines for a parallel computation through the `IMultiEngine` interface. In this interface, each engine has a specific id (an int) that is used to refer to the engine, run code on it, etc. """ implements(IFullBlockingMultiEngineClient) def __init__(self, smultiengine): self.smultiengine = smultiengine self.block = True self.targets = 'all' def _findBlock(self, block=None): if block is None: return self.block else: if block in (True, False): return block else: raise ValueError("block must be True or False") def _findTargets(self, targets=None): if targets is None: return self.targets else: if not isinstance(targets, (str,list,tuple,int)): raise ValueError("targets must be a str, list, tuple or int") return targets def _findTargetsAndBlock(self, targets=None, block=None): return self._findTargets(targets), self._findBlock(block) def _blockFromThread(self, function, *args, **kwargs): block = kwargs.get('block', None) if block is None: raise error.MissingBlockArgument("'block' keyword argument is missing") result = blockingCallFromThread(function, *args, **kwargs) if not block: result = PendingResult(self, result) return result def get_pending_deferred(self, deferredID, block): return blockingCallFromThread(self.smultiengine.get_pending_deferred, deferredID, block) def barrier(self, pendingResults): """Synchronize a set of `PendingResults`. This method is a synchronization primitive that waits for a set of `PendingResult` objects to complete. More specifically, barier does the following. * The `PendingResult`s are sorted by result_id. * The `get_result` method is called for each `PendingResult` sequentially with block=True. * If a `PendingResult` gets a result that is an exception, it is trapped and can be re-raised later by calling `get_result` again. * The `PendingResult`s are flushed from the controller. After barrier has been called on a `PendingResult`, its results can be retrieved by calling `get_result` again or accesing the `r` attribute of the instance. """ # Convert to list for sorting and check class type prList = list(pendingResults) for pr in prList: if not isinstance(pr, PendingResult): raise error.NotAPendingResult("Objects passed to barrier must be PendingResult instances") # Sort the PendingResults so they are in order prList.sort() # Block on each PendingResult object for pr in prList: try: result = pr.get_result(block=True) except Exception: pass def flush(self): """ Clear all pending deferreds/results from the controller. For each `PendingResult` that is created by this client, the controller holds on to the result for that `PendingResult`. This can be a problem if there are a large number of `PendingResult` objects that are created. Once the result of the `PendingResult` has been retrieved, the result is removed from the controller, but if a user doesn't get a result ( they just ignore the `PendingResult`) the result is kept forever on the controller. This method allows the user to clear out all un-retrieved results on the controller. """ r = blockingCallFromThread(self.smultiengine.clear_pending_deferreds) return r clear_pending_results = flush #--------------------------------------------------------------------------- # IEngineMultiplexer related methods #--------------------------------------------------------------------------- def execute(self, lines, targets=None, block=None): """ Execute code on a set of engines. :Parameters: lines : str The Python code to execute as a string targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) result = blockingCallFromThread(self.smultiengine.execute, lines, targets=targets, block=block) if block: result = ResultList(result) else: result = PendingResult(self, result) result.add_callback(wrapResultList) return result def push(self, namespace, targets=None, block=None): """ Push a dictionary of keys and values to engines namespace. Each engine has a persistent namespace. This method is used to push Python objects into that namespace. The objects in the namespace must be pickleable. :Parameters: namespace : dict A dict that contains Python objects to be injected into the engine persistent namespace. targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.push, namespace, targets=targets, block=block) def pull(self, keys, targets=None, block=None): """ Pull Python objects by key out of engines namespaces. :Parameters: keys : str or list of str The names of the variables to be pulled targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.pull, keys, targets=targets, block=block) def push_function(self, namespace, targets=None, block=None): """ Push a Python function to an engine. This method is used to push a Python function to an engine. This method can then be used in code on the engines. Closures are not supported. :Parameters: namespace : dict A dict whose values are the functions to be pushed. The keys give that names that the function will appear as in the engines namespace. targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.push_function, namespace, targets=targets, block=block) def pull_function(self, keys, targets=None, block=None): """ Pull a Python function from an engine. This method is used to pull a Python function from an engine. Closures are not supported. :Parameters: keys : str or list of str The names of the functions to be pulled targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.pull_function, keys, targets=targets, block=block) def push_serialized(self, namespace, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.push_serialized, namespace, targets=targets, block=block) def pull_serialized(self, keys, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.pull_serialized, keys, targets=targets, block=block) def get_result(self, i=None, targets=None, block=None): """ Get a previous result. When code is executed in an engine, a dict is created and returned. This method retrieves that dict for previous commands. :Parameters: i : int The number of the result to get targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) result = blockingCallFromThread(self.smultiengine.get_result, i, targets=targets, block=block) if block: result = ResultList(result) else: result = PendingResult(self, result) result.add_callback(wrapResultList) return result def reset(self, targets=None, block=None): """ Reset an engine. This method clears out the namespace of an engine. :Parameters: targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.reset, targets=targets, block=block) def keys(self, targets=None, block=None): """ Get a list of all the variables in an engine's namespace. :Parameters: targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.keys, targets=targets, block=block) def kill(self, controller=False, targets=None, block=None): """ Kill the engines and controller. This method is used to stop the engine and controller by calling `reactor.stop`. :Parameters: controller : boolean If True, kill the engines and controller. If False, just the engines targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.kill, controller, targets=targets, block=block) def clear_queue(self, targets=None, block=None): """ Clear out the controller's queue for an engine. The controller maintains a queue for each engine. This clear it out. :Parameters: targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.clear_queue, targets=targets, block=block) def queue_status(self, targets=None, block=None): """ Get the status of an engines queue. :Parameters: targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.queue_status, targets=targets, block=block) def set_properties(self, properties, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.set_properties, properties, targets=targets, block=block) def get_properties(self, keys=None, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.get_properties, keys, targets=targets, block=block) def has_properties(self, keys, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.has_properties, keys, targets=targets, block=block) def del_properties(self, keys, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.del_properties, keys, targets=targets, block=block) def clear_properties(self, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.clear_properties, targets=targets, block=block) #--------------------------------------------------------------------------- # IMultiEngine related methods #--------------------------------------------------------------------------- def get_ids(self): """ Returns the ids of currently registered engines. """ result = blockingCallFromThread(self.smultiengine.get_ids) return result #--------------------------------------------------------------------------- # IMultiEngineCoordinator #--------------------------------------------------------------------------- def scatter(self, key, seq, style='basic', flatten=False, targets=None, block=None): """ Partition a Python sequence and send the partitions to a set of engines. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.scatter, key, seq, style, flatten, targets=targets, block=block) def gather(self, key, style='basic', targets=None, block=None): """ Gather a partitioned sequence on a set of engines as a single local seq. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.gather, key, style, targets=targets, block=block) def map(self, func, seq, style='basic', targets=None, block=None): """ A parallelized version of Python's builtin map """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.map, func, seq, style, targets=targets, block=block) #--------------------------------------------------------------------------- # IMultiEngineExtras #--------------------------------------------------------------------------- def zip_pull(self, keys, targets=None, block=None): targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.zip_pull, keys, targets=targets, block=block) def run(self, filename, targets=None, block=None): """ Run a Python code in a file on the engines. :Parameters: filename : str The name of the local file to run targets : id or list of ids The engine to use for the execution block : boolean If False, this method will return the actual result. If False, a `PendingResult` is returned which can be used to get the result at a later time. """ targets, block = self._findTargetsAndBlock(targets, block) return self._blockFromThread(self.smultiengine.run, filename, targets=targets, block=block) components.registerAdapter(FullBlockingMultiEngineClient, IFullSynchronousMultiEngine, IFullBlockingMultiEngineClient)