##// END OF EJS Templates
Update md5 calls.
Update md5 calls.

File last commit:

r2526:ec6b47e5
r3116:be0324d2
Show More
multiengineclient.py
905 lines | 34.8 KiB | text/x-python | PythonLexer
# encoding: utf-8
# -*- test-case-name: IPython.kernel.test.test_multiengineclient -*-
"""General Classes for IMultiEngine clients."""
__docformat__ = "restructuredtext en"
#-------------------------------------------------------------------------------
# Copyright (C) 2008 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
# Imports
#-------------------------------------------------------------------------------
import sys
import warnings
from twisted.python import components
from twisted.python.failure import Failure
from zope.interface import Interface, implements, Attribute
try:
from foolscap.api import DeadReferenceError
except ImportError:
from foolscap import DeadReferenceError
from IPython.utils.coloransi import TermColors
from IPython.kernel.twistedutil import blockingCallFromThread
from IPython.kernel import error
from IPython.kernel.parallelfunction import ParallelFunction
from IPython.kernel.mapper import (
MultiEngineMapper,
IMultiEngineMapperFactory,
IMapper
)
from IPython.kernel.multiengine import IFullSynchronousMultiEngine
#-------------------------------------------------------------------------------
# Pending Result things
#-------------------------------------------------------------------------------
class IPendingResult(Interface):
"""A representation of a result that is pending.
This class is similar to Twisted's `Deferred` object, but is designed to be
used in a synchronous context.
"""
result_id=Attribute("ID of the deferred on the other side")
client=Attribute("A client that I came from")
r=Attribute("An attribute that is a property that calls and returns get_result")
def get_result(default=None, block=True):
"""
Get a result that is pending.
:Parameters:
default
The value to return if the result is not ready.
block : boolean
Should I block for the result.
:Returns: The actual result or the default value.
"""
def add_callback(f, *args, **kwargs):
"""
Add a callback that is called with the result.
If the original result is foo, adding a callback will cause
f(foo, *args, **kwargs) to be returned instead. If multiple
callbacks are registered, they are chained together: the result of
one is passed to the next and so on.
Unlike Twisted's Deferred object, there is no errback chain. Thus
any exception raised will not be caught and handled. User must
catch these by hand when calling `get_result`.
"""
class PendingResult(object):
"""A representation of a result that is not yet ready.
A user should not create a `PendingResult` instance by hand.
Methods:
* `get_result`
* `add_callback`
Properties:
* `r`
"""
def __init__(self, client, result_id):
"""Create a PendingResult with a result_id and a client instance.
The client should implement `_getPendingResult(result_id, block)`.
"""
self.client = client
self.result_id = result_id
self.called = False
self.raised = False
self.callbacks = []
def get_result(self, default=None, block=True):
"""Get a result that is pending.
This method will connect to an IMultiEngine adapted controller
and see if the result is ready. If the action triggers an exception
raise it and record it. This method records the result/exception once it is
retrieved. Calling `get_result` again will get this cached result or will
re-raise the exception. The .r attribute is a property that calls
`get_result` with block=True.
:Parameters:
default
The value to return if the result is not ready.
block : boolean
Should I block for the result.
:Returns: The actual result or the default value.
"""
if self.called:
if self.raised:
raise self.result[0], self.result[1], self.result[2]
else:
return self.result
try:
result = self.client.get_pending_deferred(self.result_id, block)
except error.ResultNotCompleted:
return default
except:
# Reraise other error, but first record them so they can be reraised
# later if .r or get_result is called again.
self.result = sys.exc_info()
self.called = True
self.raised = True
raise
else:
for cb in self.callbacks:
result = cb[0](result, *cb[1], **cb[2])
self.result = result
self.called = True
return result
def add_callback(self, f, *args, **kwargs):
"""Add a callback that is called with the result.
If the original result is result, adding a callback will cause
f(result, *args, **kwargs) to be returned instead. If multiple
callbacks are registered, they are chained together: the result of
one is passed to the next and so on.
Unlike Twisted's Deferred object, there is no errback chain. Thus
any exception raised will not be caught and handled. User must
catch these by hand when calling `get_result`.
"""
assert callable(f)
self.callbacks.append((f, args, kwargs))
def __cmp__(self, other):
if self.result_id < other.result_id:
return -1
else:
return 1
def _get_r(self):
return self.get_result(block=True)
r = property(_get_r)
"""This property is a shortcut to a `get_result(block=True)`."""
#-------------------------------------------------------------------------------
# Pretty printing wrappers for certain lists
#-------------------------------------------------------------------------------
class ResultList(list):
"""A subclass of list that pretty prints the output of `execute`/`get_result`."""
def __repr__(self):
output = []
# These colored prompts were not working on Windows
if sys.platform == 'win32':
blue = normal = red = green = ''
else:
blue = TermColors.Blue
normal = TermColors.Normal
red = TermColors.Red
green = TermColors.Green
output.append("<Results List>\n")
for cmd in self:
if isinstance(cmd, Failure):
output.append(cmd)
else:
target = cmd.get('id',None)
cmd_num = cmd.get('number',None)
cmd_stdin = cmd.get('input',{}).get('translated','No Input')
cmd_stdout = cmd.get('stdout', None)
cmd_stderr = cmd.get('stderr', None)
output.append("%s[%i]%s In [%i]:%s %s\n" % \
(green, target,
blue, cmd_num, normal, cmd_stdin))
if cmd_stdout:
output.append("%s[%i]%s Out[%i]:%s %s\n" % \
(green, target,
red, cmd_num, normal, cmd_stdout))
if cmd_stderr:
output.append("%s[%i]%s Err[%i]:\n%s %s" % \
(green, target,
red, cmd_num, normal, cmd_stderr))
return ''.join(output)
def wrapResultList(result):
"""A function that wraps the output of `execute`/`get_result` -> `ResultList`."""
if len(result) == 0:
result = [result]
return ResultList(result)
class QueueStatusList(list):
"""A subclass of list that pretty prints the output of `queue_status`."""
def __repr__(self):
output = []
output.append("<Queue Status List>\n")
for e in self:
output.append("Engine: %s\n" % repr(e[0]))
output.append(" Pending: %s\n" % repr(e[1]['pending']))
for q in e[1]['queue']:
output.append(" Command: %s\n" % repr(q))
return ''.join(output)
#-------------------------------------------------------------------------------
# InteractiveMultiEngineClient
#-------------------------------------------------------------------------------
class InteractiveMultiEngineClient(object):
"""A mixin class that add a few methods to a multiengine client.
The methods in this mixin class are designed for interactive usage.
"""
def activate(self):
"""Make this `MultiEngineClient` active for parallel magic commands.
IPython has a magic command syntax to work with `MultiEngineClient` objects.
In a given IPython session there is a single active one. While
there can be many `MultiEngineClient` created and used by the user,
there is only one active one. The active `MultiEngineClient` is used whenever
the magic commands %px and %autopx are used.
The activate() method is called on a given `MultiEngineClient` to make it
active. Once this has been done, the magic commands can be used.
"""
try:
# This is injected into __builtins__.
ip = get_ipython()
except NameError:
print "The IPython parallel magics (%result, %px, %autopx) only work within IPython."
else:
pmagic = ip.get_component('parallel_magic')
if pmagic is not None:
pmagic.active_multiengine_client = self
else:
print "You must first load the parallelmagic extension " \
"by doing '%load_ext parallelmagic'"
def __setitem__(self, key, value):
"""Add a dictionary interface for pushing/pulling.
This functions as a shorthand for `push`.
:Parameters:
key : str
What to call the remote object.
value : object
The local Python object to push.
"""
targets, block = self._findTargetsAndBlock()
return self.push({key:value}, targets=targets, block=block)
def __getitem__(self, key):
"""Add a dictionary interface for pushing/pulling.
This functions as a shorthand to `pull`.
:Parameters:
- `key`: A string representing the key.
"""
if isinstance(key, str):
targets, block = self._findTargetsAndBlock()
return self.pull(key, targets=targets, block=block)
else:
raise TypeError("__getitem__ only takes strs")
def __len__(self):
"""Return the number of available engines."""
return len(self.get_ids())
#-------------------------------------------------------------------------------
# The top-level MultiEngine client adaptor
#-------------------------------------------------------------------------------
_prop_warn = """\
We are currently refactoring the task dependency system. This might
involve the removal of this method and other methods related to engine
properties. Please see the docstrings for IPython.kernel.TaskRejectError
for more information."""
class IFullBlockingMultiEngineClient(Interface):
pass
class FullBlockingMultiEngineClient(InteractiveMultiEngineClient):
"""
A blocking client to the `IMultiEngine` controller interface.
This class allows users to use a set of engines for a parallel
computation through the `IMultiEngine` interface. In this interface,
each engine has a specific id (an int) that is used to refer to the
engine, run code on it, etc.
"""
implements(
IFullBlockingMultiEngineClient,
IMultiEngineMapperFactory,
IMapper
)
def __init__(self, smultiengine):
self.smultiengine = smultiengine
self.block = True
self.targets = 'all'
def _findBlock(self, block=None):
if block is None:
return self.block
else:
if block in (True, False):
return block
else:
raise ValueError("block must be True or False")
def _findTargets(self, targets=None):
if targets is None:
return self.targets
else:
if not isinstance(targets, (str,list,tuple,int)):
raise ValueError("targets must be a str, list, tuple or int")
return targets
def _findTargetsAndBlock(self, targets=None, block=None):
return self._findTargets(targets), self._findBlock(block)
def _bcft(self, *args, **kwargs):
try:
result = blockingCallFromThread(*args, **kwargs)
except DeadReferenceError:
raise error.ConnectionError(
"""A connection error has occurred in trying to connect to the
controller. This is usually caused by the controller dying or
being restarted. To resolve this issue try recreating the
multiengine client."""
)
else:
return result
def _blockFromThread(self, function, *args, **kwargs):
block = kwargs.get('block', None)
if block is None:
raise error.MissingBlockArgument("'block' keyword argument is missing")
result = self._bcft(function, *args, **kwargs)
if not block:
result = PendingResult(self, result)
return result
def get_pending_deferred(self, deferredID, block):
return self._bcft(self.smultiengine.get_pending_deferred, deferredID, block)
def barrier(self, pendingResults):
"""Synchronize a set of `PendingResults`.
This method is a synchronization primitive that waits for a set of
`PendingResult` objects to complete. More specifically, barier does
the following.
* The `PendingResult`s are sorted by result_id.
* The `get_result` method is called for each `PendingResult` sequentially
with block=True.
* If a `PendingResult` gets a result that is an exception, it is
trapped and can be re-raised later by calling `get_result` again.
* The `PendingResult`s are flushed from the controller.
After barrier has been called on a `PendingResult`, its results can
be retrieved by calling `get_result` again or accesing the `r` attribute
of the instance.
"""
# Convert to list for sorting and check class type
prList = list(pendingResults)
for pr in prList:
if not isinstance(pr, PendingResult):
raise error.NotAPendingResult("Objects passed to barrier must be PendingResult instances")
# Sort the PendingResults so they are in order
prList.sort()
# Block on each PendingResult object
for pr in prList:
try:
result = pr.get_result(block=True)
except Exception:
pass
def flush(self):
"""
Clear all pending deferreds/results from the controller.
For each `PendingResult` that is created by this client, the controller
holds on to the result for that `PendingResult`. This can be a problem
if there are a large number of `PendingResult` objects that are created.
Once the result of the `PendingResult` has been retrieved, the result
is removed from the controller, but if a user doesn't get a result (
they just ignore the `PendingResult`) the result is kept forever on the
controller. This method allows the user to clear out all un-retrieved
results on the controller.
"""
r = self._bcft(self.smultiengine.clear_pending_deferreds)
return r
clear_pending_results = flush
#---------------------------------------------------------------------------
# IEngineMultiplexer related methods
#---------------------------------------------------------------------------
def execute(self, lines, targets=None, block=None):
"""
Execute code on a set of engines.
:Parameters:
lines : str
The Python code to execute as a string
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
result = self._bcft(self.smultiengine.execute, lines,
targets=targets, block=block)
if block:
result = ResultList(result)
else:
result = PendingResult(self, result)
result.add_callback(wrapResultList)
return result
def push(self, namespace, targets=None, block=None):
"""
Push a dictionary of keys and values to engines namespace.
Each engine has a persistent namespace. This method is used to push
Python objects into that namespace.
The objects in the namespace must be pickleable.
:Parameters:
namespace : dict
A dict that contains Python objects to be injected into
the engine persistent namespace.
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.push, namespace,
targets=targets, block=block)
def pull(self, keys, targets=None, block=None):
"""
Pull Python objects by key out of engines namespaces.
:Parameters:
keys : str or list of str
The names of the variables to be pulled
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.pull, keys, targets=targets, block=block)
def push_function(self, namespace, targets=None, block=None):
"""
Push a Python function to an engine.
This method is used to push a Python function to an engine. This
method can then be used in code on the engines. Closures are not supported.
:Parameters:
namespace : dict
A dict whose values are the functions to be pushed. The keys give
that names that the function will appear as in the engines
namespace.
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.push_function, namespace, targets=targets, block=block)
def pull_function(self, keys, targets=None, block=None):
"""
Pull a Python function from an engine.
This method is used to pull a Python function from an engine.
Closures are not supported.
:Parameters:
keys : str or list of str
The names of the functions to be pulled
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.pull_function, keys, targets=targets, block=block)
def push_serialized(self, namespace, targets=None, block=None):
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.push_serialized, namespace, targets=targets, block=block)
def pull_serialized(self, keys, targets=None, block=None):
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.pull_serialized, keys, targets=targets, block=block)
def get_result(self, i=None, targets=None, block=None):
"""
Get a previous result.
When code is executed in an engine, a dict is created and returned. This
method retrieves that dict for previous commands.
:Parameters:
i : int
The number of the result to get
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
result = self._bcft(self.smultiengine.get_result, i, targets=targets, block=block)
if block:
result = ResultList(result)
else:
result = PendingResult(self, result)
result.add_callback(wrapResultList)
return result
def reset(self, targets=None, block=None):
"""
Reset an engine.
This method clears out the namespace of an engine.
:Parameters:
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.reset, targets=targets, block=block)
def keys(self, targets=None, block=None):
"""
Get a list of all the variables in an engine's namespace.
:Parameters:
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.keys, targets=targets, block=block)
def kill(self, controller=False, targets=None, block=None):
"""
Kill the engines and controller.
This method is used to stop the engine and controller by calling
`reactor.stop`.
:Parameters:
controller : boolean
If True, kill the engines and controller. If False, just the
engines
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.kill, controller, targets=targets, block=block)
def clear_queue(self, targets=None, block=None):
"""
Clear out the controller's queue for an engine.
The controller maintains a queue for each engine. This clear it out.
:Parameters:
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.clear_queue, targets=targets, block=block)
def queue_status(self, targets=None, block=None):
"""
Get the status of an engines queue.
:Parameters:
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.queue_status, targets=targets, block=block)
def set_properties(self, properties, targets=None, block=None):
warnings.warn(_prop_warn)
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.set_properties, properties, targets=targets, block=block)
def get_properties(self, keys=None, targets=None, block=None):
warnings.warn(_prop_warn)
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.get_properties, keys, targets=targets, block=block)
def has_properties(self, keys, targets=None, block=None):
warnings.warn(_prop_warn)
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.has_properties, keys, targets=targets, block=block)
def del_properties(self, keys, targets=None, block=None):
warnings.warn(_prop_warn)
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.del_properties, keys, targets=targets, block=block)
def clear_properties(self, targets=None, block=None):
warnings.warn(_prop_warn)
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.clear_properties, targets=targets, block=block)
#---------------------------------------------------------------------------
# IMultiEngine related methods
#---------------------------------------------------------------------------
def get_ids(self):
"""
Returns the ids of currently registered engines.
"""
result = self._bcft(self.smultiengine.get_ids)
return result
#---------------------------------------------------------------------------
# IMultiEngineCoordinator
#---------------------------------------------------------------------------
def scatter(self, key, seq, dist='b', flatten=False, targets=None, block=None):
"""
Partition a Python sequence and send the partitions to a set of engines.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.scatter, key, seq,
dist, flatten, targets=targets, block=block)
def gather(self, key, dist='b', targets=None, block=None):
"""
Gather a partitioned sequence on a set of engines as a single local seq.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.gather, key, dist,
targets=targets, block=block)
def raw_map(self, func, seq, dist='b', targets=None, block=None):
"""
A parallelized version of Python's builtin map.
This has a slightly different syntax than the builtin `map`.
This is needed because we need to have keyword arguments and thus
can't use *args to capture all the sequences. Instead, they must
be passed in a list or tuple.
raw_map(func, seqs) -> map(func, seqs[0], seqs[1], ...)
Most users will want to use parallel functions or the `mapper`
and `map` methods for an API that follows that of the builtin
`map`.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.raw_map, func, seq,
dist, targets=targets, block=block)
def map(self, func, *sequences):
"""
A parallel version of Python's builtin `map` function.
This method applies a function to sequences of arguments. It
follows the same syntax as the builtin `map`.
This method creates a mapper objects by calling `self.mapper` with
no arguments and then uses that mapper to do the mapping. See
the documentation of `mapper` for more details.
"""
return self.mapper().map(func, *sequences)
def mapper(self, dist='b', targets='all', block=None):
"""
Create a mapper object that has a `map` method.
This method returns an object that implements the `IMapper`
interface. This method is a factory that is used to control how
the map happens.
:Parameters:
dist : str
What decomposition to use, 'b' is the only one supported
currently
targets : str, int, sequence of ints
Which engines to use for the map
block : boolean
Should calls to `map` block or not
"""
return MultiEngineMapper(self, dist, targets, block)
def parallel(self, dist='b', targets=None, block=None):
"""
A decorator that turns a function into a parallel function.
This can be used as:
@parallel()
def f(x, y)
...
f(range(10), range(10))
This causes f(0,0), f(1,1), ... to be called in parallel.
:Parameters:
dist : str
What decomposition to use, 'b' is the only one supported
currently
targets : str, int, sequence of ints
Which engines to use for the map
block : boolean
Should calls to `map` block or not
"""
targets, block = self._findTargetsAndBlock(targets, block)
mapper = self.mapper(dist, targets, block)
pf = ParallelFunction(mapper)
return pf
#---------------------------------------------------------------------------
# IMultiEngineExtras
#---------------------------------------------------------------------------
def zip_pull(self, keys, targets=None, block=None):
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.zip_pull, keys,
targets=targets, block=block)
def run(self, filename, targets=None, block=None):
"""
Run a Python code in a file on the engines.
:Parameters:
filename : str
The name of the local file to run
targets : id or list of ids
The engine to use for the execution
block : boolean
If False, this method will return the actual result. If False,
a `PendingResult` is returned which can be used to get the result
at a later time.
"""
targets, block = self._findTargetsAndBlock(targets, block)
return self._blockFromThread(self.smultiengine.run, filename,
targets=targets, block=block)
def benchmark(self, push_size=10000):
"""
Run performance benchmarks for the current IPython cluster.
This method tests both the latency of sending command and data to the
engines as well as the throughput of sending large objects to the
engines using push. The latency is measured by having one or more
engines execute the command 'pass'. The throughput is measure by
sending an NumPy array of size `push_size` to one or more engines.
These benchmarks will vary widely on different hardware and networks
and thus can be used to get an idea of the performance characteristics
of a particular configuration of an IPython controller and engines.
This function is not testable within our current testing framework.
"""
import timeit, __builtin__
__builtin__._mec_self = self
benchmarks = {}
repeat = 3
count = 10
timer = timeit.Timer('_mec_self.execute("pass",0)')
result = 1000*min(timer.repeat(repeat,count))/count
benchmarks['single_engine_latency'] = (result,'msec')
timer = timeit.Timer('_mec_self.execute("pass")')
result = 1000*min(timer.repeat(repeat,count))/count
benchmarks['all_engine_latency'] = (result,'msec')
try:
import numpy as np
except:
pass
else:
timer = timeit.Timer(
"_mec_self.push(d)",
"import numpy as np; d = dict(a=np.zeros(%r,dtype='float64'))" % push_size
)
result = min(timer.repeat(repeat,count))/count
benchmarks['all_engine_push'] = (1e-6*push_size*8/result, 'MB/sec')
try:
import numpy as np
except:
pass
else:
timer = timeit.Timer(
"_mec_self.push(d,0)",
"import numpy as np; d = dict(a=np.zeros(%r,dtype='float64'))" % push_size
)
result = min(timer.repeat(repeat,count))/count
benchmarks['single_engine_push'] = (1e-6*push_size*8/result, 'MB/sec')
return benchmarks
components.registerAdapter(FullBlockingMultiEngineClient,
IFullSynchronousMultiEngine, IFullBlockingMultiEngineClient)