upstream/ipython Commit - r3882:50deb546

Merge PR ...

MinRK -

r3882:50deb546

parent child

IPython/parallel/tests/test_lbview.py

0 created 644 +120 0

@@ -0,0 +1,120
	1	"""test LoadBalancedView objects"""
	2	# -- coding: utf-8 --
	3	#-------------------------------------------------------------------------------
	4	# Copyright (C) 2011 The IPython Development Team
	5	#
	6	# Distributed under the terms of the BSD License. The full license is in
	7	# the file COPYING, distributed as part of this software.
	8	#-------------------------------------------------------------------------------
	9
	10	#-------------------------------------------------------------------------------
	11	# Imports
	12	#-------------------------------------------------------------------------------
	13
	14	import sys
	15	import time
	16
	17	import zmq
	18
	19	from IPython import parallel as pmod
	20	from IPython.parallel import error
	21
	22	from IPython.parallel.tests import add_engines
	23
	24	from .clienttest import ClusterTestCase, crash, wait, skip_without
	25
	26	def setup():
	27	add_engines(3)
	28
	29	class TestLoadBalancedView(ClusterTestCase):
	30
	31	def setUp(self):
	32	ClusterTestCase.setUp(self)
	33	self.view = self.client.load_balanced_view()
	34
	35	def test_z_crash_task(self):
	36	"""test graceful handling of engine death (balanced)"""
	37	# self.add_engines(1)
	38	ar = self.view.apply_async(crash)
	39	self.assertRaisesRemote(error.EngineError, ar.get, 10)
	40	eid = ar.engine_id
	41	tic = time.time()
	42	while eid in self.client.ids and time.time()-tic < 5:
	43	time.sleep(.01)
	44	self.client.spin()
	45	self.assertFalse(eid in self.client.ids, "Engine should have died")
	46
	47	def test_map(self):
	48	def f(x):
	49	return x**2
	50	data = range(16)
	51	r = self.view.map_sync(f, data)
	52	self.assertEquals(r, map(f, data))
	53
	54	def test_abort(self):
	55	view = self.view
	56	ar = self.client[:].apply_async(time.sleep, .5)
	57	ar2 = view.apply_async(lambda : 2)
	58	ar3 = view.apply_async(lambda : 3)
	59	view.abort(ar2)
	60	view.abort(ar3.msg_ids)
	61	self.assertRaises(error.TaskAborted, ar2.get)
	62	self.assertRaises(error.TaskAborted, ar3.get)
	63
	64	def test_retries(self):
	65	add_engines(3)
	66	view = self.view
	67	view.timeout = 1 # prevent hang if this doesn't behave
	68	def fail():
	69	assert False
	70	for r in range(len(self.client)-1):
	71	with view.temp_flags(retries=r):
	72	self.assertRaisesRemote(AssertionError, view.apply_sync, fail)
	73
	74	with view.temp_flags(retries=len(self.client), timeout=0.25):
	75	self.assertRaisesRemote(error.TaskTimeout, view.apply_sync, fail)
	76
	77	def test_invalid_dependency(self):
	78	view = self.view
	79	with view.temp_flags(after='12345'):
	80	self.assertRaisesRemote(error.InvalidDependency, view.apply_sync, lambda : 1)
	81
	82	def test_impossible_dependency(self):
	83	if len(self.client) < 2:
	84	add_engines(2)
	85	view = self.client.load_balanced_view()
	86	ar1 = view.apply_async(lambda : 1)
	87	ar1.get()
	88	e1 = ar1.engine_id
	89	e2 = e1
	90	while e2 == e1:
	91	ar2 = view.apply_async(lambda : 1)
	92	ar2.get()
	93	e2 = ar2.engine_id
	94
	95	with view.temp_flags(follow=[ar1, ar2]):
	96	self.assertRaisesRemote(error.ImpossibleDependency, view.apply_sync, lambda : 1)
	97
	98
	99	def test_follow(self):
	100	ar = self.view.apply_async(lambda : 1)
	101	ar.get()
	102	ars = []
	103	first_id = ar.engine_id
	104
	105	self.view.follow = ar
	106	for i in range(5):
	107	ars.append(self.view.apply_async(lambda : 1))
	108	self.view.wait(ars)
	109	for ar in ars:
	110	self.assertEquals(ar.engine_id, first_id)
	111
	112	def test_after(self):
	113	view = self.view
	114	ar = view.apply_async(time.sleep, 0.5)
	115	with view.temp_flags(after=ar):
	116	ar2 = view.apply_async(lambda : 1)
	117
	118	ar.wait()
	119	ar2.wait()
	120	self.assertTrue(ar2.started > ar.completed)

IPython/parallel/tests/test_mongodb.py

0 created 644 +37 0

@@ -0,0 +1,37
	1	"""Tests for mongodb backend"""
	2
	3	#-------------------------------------------------------------------------------
	4	# Copyright (C) 2011 The IPython Development Team
	5	#
	6	# Distributed under the terms of the BSD License. The full license is in
	7	# the file COPYING, distributed as part of this software.
	8	#-------------------------------------------------------------------------------
	9
	10	#-------------------------------------------------------------------------------
	11	# Imports
	12	#-------------------------------------------------------------------------------
	13
	14	from nose import SkipTest
	15
	16	from pymongo import Connection
	17	from IPython.parallel.controller.mongodb import MongoDB
	18
	19	from . import test_db
	20
	21	try:
	22	c = Connection()
	23	except Exception:
	24	c=None
	25
	26	class TestMongoBackend(test_db.TestDictBackend):
	27	"""MongoDB backend tests"""
	28
	29	def create_db(self):
	30	try:
	31	return MongoDB(database='iptestdb', _connection=c)
	32	except Exception:
	33	raise SkipTest("Couldn't connect to mongodb")
	34
	35	def teardown(self):
	36	if c is not None:
	37	c.drop_database('iptestdb')

docs/source/parallel/parallel_db.txt

0 created 644 +114 0

@@ -0,0 +1,114
	1	.. _parallel_db:
	2
	3	=======================
	4	IPython's Task Database
	5	=======================
	6
	7	The IPython Hub stores all task requests and results in a database. Currently supported backends
	8	are: MongoDB, SQLite (the default), and an in-memory DictDB. The most common use case for
	9	this is clients requesting results for tasks they did not submit, via:
	10
	11	.. sourcecode:: ipython
	12
	13	In [1]: rc.get_result(task_id)
	14
	15	However, since we have this DB backend, we provide a direct query method in the :class:`client`
	16	for users who want deeper introspection into their task history. The :meth:`db_query` method of
	17	the Client is modeled after MongoDB queries, so if you have used MongoDB it should look
	18	familiar. In fact, when the MongoDB backend is in use, the query is relayed directly. However,
	19	when using other backends, the interface is emulated and only a subset of queries is possible.
	20
	21	.. seealso::
	22
	23	MongoDB query docs: http://www.mongodb.org/display/DOCS/Querying
	24
	25	:meth:`Client.db_query` takes a dictionary query object, with keys from the TaskRecord key list,
	26	and values of either exact values to test, or MongoDB queries, which are dicts of The form:
	27	``{'operator' : 'argument(s)'}``. There is also an optional `keys` argument, that specifies
	28	which subset of keys should be retrieved. The default is to retrieve all keys excluding the
	29	request and result buffers. :meth:`db_query` returns a list of TaskRecord dicts. Also like
	30	MongoDB, the `msg_id` key will always be included, whether requested or not.
	31
	32	TaskRecord keys:
	33
	34	=============== =============== =============
	35	Key Type Description
	36	=============== =============== =============
	37	msg_id uuid(bytes) The msg ID
	38	header dict The request header
	39	content dict The request content (likely empty)
	40	buffers list(bytes) buffers containing serialized request objects
	41	submitted datetime timestamp for time of submission (set by client)
	42	client_uuid uuid(bytes) IDENT of client's socket
	43	engine_uuid uuid(bytes) IDENT of engine's socket
	44	started datetime time task began execution on engine
	45	completed datetime time task finished execution (success or failure) on engine
	46	resubmitted datetime time of resubmission (if applicable)
	47	result_header dict header for result
	48	result_content dict content for result
	49	result_buffers list(bytes) buffers containing serialized request objects
	50	queue bytes The name of the queue for the task ('mux' or 'task')
	51	pyin <unused> Python input (unused)
	52	pyout <unused> Python output (unused)
	53	pyerr <unused> Python traceback (unused)
	54	stdout str Stream of stdout data
	55	stderr str Stream of stderr data
	56
	57	=============== =============== =============
	58
	59	MongoDB operators we emulate on all backends:
	60
	61	========== =================
	62	Operator Python equivalent
	63	========== =================
	64	'$in' in
	65	'$nin' not in
	66	'$eq' ==
	67	'$ne' !=
	68	'$ge' >
	69	'$gte' >=
	70	'$le' <
	71	'$lte' <=
	72	========== =================
	73
	74
	75	The DB Query is useful for two primary cases:
	76
	77	1. deep polling of task status or metadata
	78	2. selecting a subset of tasks, on which to perform a later operation (e.g. wait on result, purge records, resubmit,...)
	79
	80	Example Queries
	81	===============
	82
	83
	84	To get all msg_ids that are not completed, only retrieving their ID and start time:
	85
	86	.. sourcecode:: ipython
	87
	88	In [1]: incomplete = rc.db_query({'complete' : None}, keys=['msg_id', 'started'])
	89
	90	All jobs started in the last hour by me:
	91
	92	.. sourcecode:: ipython
	93
	94	In [1]: from datetime import datetime, timedelta
	95
	96	In [2]: hourago = datetime.now() - timedelta(1./24)
	97
	98	In [3]: recent = rc.db_query({'started' : {'$gte' : hourago },
	99	'client_uuid' : rc.session.session})
	100
	101	All jobs started more than an hour ago, by clients other than me:
	102
	103	.. sourcecode:: ipython
	104
	105	In [3]: recent = rc.db_query({'started' : {'$le' : hourago },
	106	'client_uuid' : {'$ne' : rc.session.session}})
	107
	108	Result headers for all jobs on engine 3 or 4:
	109
	110	.. sourcecode:: ipython
	111
	112	In [1]: uuids = map(rc._engines.get, (3,4))
	113
	114	In [2]: hist34 = rc.db_query({'engine_uuid' : {'$in' : uuids }, keys='result_header')

IPython/parallel/client/client.py

0 +65 -1

@@ -1041,6 +1041,68 class Client(HasTraits):
1041	ar.wait()	1041	ar.wait()
1042		1042
1043	return ar	1043	return ar
		1044
		1045	@spin_first
		1046	def resubmit(self, indices_or_msg_ids=None, subheader=None, block=None):
		1047	"""Resubmit one or more tasks.
		1048
		1049	in-flight tasks may not be resubmitted.
		1050
		1051	Parameters
		1052	----------
		1053
		1054	indices_or_msg_ids : integer history index, str msg_id, or list of either
		1055	The indices or msg_ids of indices to be retrieved
		1056
		1057	block : bool
		1058	Whether to wait for the result to be done
		1059
		1060	Returns
		1061	-------
		1062
		1063	AsyncHubResult
		1064	A subclass of AsyncResult that retrieves results from the Hub
		1065
		1066	"""
		1067	block = self.block if block is None else block
		1068	if indices_or_msg_ids is None:
		1069	indices_or_msg_ids = -1
		1070
		1071	if not isinstance(indices_or_msg_ids, (list,tuple)):
		1072	indices_or_msg_ids = [indices_or_msg_ids]
		1073
		1074	theids = []
		1075	for id in indices_or_msg_ids:
		1076	if isinstance(id, int):
		1077	id = self.history[id]
		1078	if not isinstance(id, str):
		1079	raise TypeError("indices must be str or int, not %r"%id)
		1080	theids.append(id)
		1081
		1082	for msg_id in theids:
		1083	self.outstanding.discard(msg_id)
		1084	if msg_id in self.history:
		1085	self.history.remove(msg_id)
		1086	self.results.pop(msg_id, None)
		1087	self.metadata.pop(msg_id, None)
		1088	content = dict(msg_ids = theids)
		1089
		1090	self.session.send(self._query_socket, 'resubmit_request', content)
		1091
		1092	zmq.select([self._query_socket], [], [])
		1093	idents,msg = self.session.recv(self._query_socket, zmq.NOBLOCK)
		1094	if self.debug:
		1095	pprint(msg)
		1096	content = msg['content']
		1097	if content['status'] != 'ok':
		1098	raise self._unwrap_exception(content)
		1099
		1100	ar = AsyncHubResult(self, msg_ids=theids)
		1101
		1102	if block:
		1103	ar.wait()
		1104
		1105	return ar
1044		1106
1045	@spin_first	1107	@spin_first
1046	def result_status(self, msg_ids, status_only=True):	1108	def result_status(self, msg_ids, status_only=True):
@@ -1255,9 +1317,11 class Client(HasTraits):
1255	query : mongodb query dict	1317	query : mongodb query dict
1256	The search dict. See mongodb query docs for details.	1318	The search dict. See mongodb query docs for details.
1257	keys : list of strs [optional]	1319	keys : list of strs [optional]
1258	THe subset of keys to be returned. The default is to fetch everything.	1320	The subset of keys to be returned. The default is to fetch everything but buffers.
1259	'msg_id' will always be included.	1321	'msg_id' will always be included.
1260	"""	1322	"""
		1323	if isinstance(keys, basestring):
		1324	keys = [keys]
1261	content = dict(query=query, keys=keys)	1325	content = dict(query=query, keys=keys)
1262	self.session.send(self._query_socket, "db_request", content=content)	1326	self.session.send(self._query_socket, "db_request", content=content)
1263	idents, msg = self.session.recv(self._query_socket, 0)	1327	idents, msg = self.session.recv(self._query_socket, 0)

IPython/parallel/client/view.py

0 +18 -9

             import zmq
             from IPython.testing import decorators as testdec
-            from IPython.utils.traitlets import HasTraits, Any, Bool, List, Dict, Set, Int, Instance, CFloat
+            from IPython.utils.traitlets import HasTraits, Any, Bool, List, Dict, Set, Int, Instance, CFloat, CInt
             from IPython.external.decorator import decorator
                 follow=Any()
                 after=Any()
                 timeout=CFloat()
+                retries = CInt(0)
                 _task_scheme = Any()
-                _flag_names = List(['targets', 'block', 'track', 'follow', 'after', 'timeout'])
+                _flag_names = List(['targets', 'block', 'track', 'follow', 'after', 'timeout', 'retries'])
                 def __init__(self, client=None, socket=None, **flags):
                     super(LoadBalancedView, self).__init__(client=client, socket=socket, **flags)
                         whether to create a MessageTracker to allow the user to
                         safely edit after arrays and buffers during non-copying
                         sends.
                     after : Dependency or collection of msg_ids
                         Only for load-balanced execution (targets=None)
                         Specify a list of msg_ids as a time-based dependency.
                         Specify an amount of time (in seconds) for the scheduler to
                         wait for dependencies to be met before failing with a
                         DependencyTimeout.
+                    retries : int
+                        Number of times a task will be retried on failure.
                     """
                     super(LoadBalancedView, self).set_flags(**kwargs)
                 @save_ids
                 def _really_apply(self, f, args=None, kwargs=None, block=None, track=None,
                                                     after=None, follow=None, timeout=None,
-                                                    targets=None):
+                                                    targets=None, retries=None):
                     """calls f(*args, **kwargs) on a remote engine, returning the result.
                     This method temporarily sets all of `apply`'s flags for a single call.
                         raise RuntimeError(msg)
                     if self._task_scheme == 'pure':
-                        # pure zmq scheme doesn't support dependencies
+                        # pure zmq scheme doesn't support extra features
-                        msg = "Pure ZMQ scheduler doesn't support dependencies"
+                        msg = "Pure ZMQ scheduler doesn't support the following flags:"
-                        if (follow or after):
+                        "follow, after, retries, targets, timeout"
-                            # hard fail on DAG dependencies
+                        if (follow or after or retries or targets or timeout):
+                            # hard fail on Scheduler flags
                             raise RuntimeError(msg)
                         if isinstance(f, dependent):
                             # soft warn on functional dependencies
                     block = self.block if block is None else block
                     track = self.track if track is None else track
                     after = self.after if after is None else after
+                    retries = self.retries if retries is None else retries
                     follow = self.follow if follow is None else follow
                     timeout = self.timeout if timeout is None else timeout
                     targets = self.targets if targets is None else targets
+                    if not isinstance(retries, int):
+                        raise TypeError('retries must be int, not %r'%type(retries))
                     if targets is None:
                         idents = []
                     else:
                     after = self._render_dependency(after)
                     follow = self._render_dependency(follow)
-                    subheader = dict(after=after, follow=follow, timeout=timeout, targets=idents)
+                    subheader = dict(after=after, follow=follow, timeout=timeout, targets=idents, retries=retries)
                     msg = self.client.send_apply_message(self._socket, f, args, kwargs, track=track,
                                             subheader=subheader)

IPython/parallel/controller/dictdb.py

0 +1 -1

                     """Remove a record from the DB."""
                     matches = self._match(check)
                     for m in matches:
-                        del self._records[m]
+                        del self._records[m['msg_id']]
                 def drop_record(self, msg_id):
                     """Remove a record from the DB."""

IPython/parallel/controller/hub.py

0 +132 -43

                         }
                     self.log.debug("Hub engine addrs: %s"%self.engine_info)
                     self.log.debug("Hub client addrs: %s"%self.client_info)
+                    # resubmit stream
+                    r = ZMQStream(ctx.socket(zmq.XREQ), loop)
+                    url = util.disambiguate_url(self.client_info['task'][-1])
+                    r.setsockopt(zmq.IDENTITY, self.session.session)
+                    r.connect(url)
                     self.hub = Hub(loop=loop, session=self.session, monitor=sub, heartmonitor=self.heartmonitor,
-                            query=q, notifier=n, db=self.db,
+                            query=q, notifier=n, resubmit=r, db=self.db,
                             engine_info=self.engine_info, client_info=self.client_info,
                             logname=self.log.name)
                 loop=Instance(ioloop.IOLoop)
                 query=Instance(ZMQStream)
                 monitor=Instance(ZMQStream)
-                heartmonitor=Instance(HeartMonitor)
                 notifier=Instance(ZMQStream)
+                resubmit=Instance(ZMQStream)
+                heartmonitor=Instance(HeartMonitor)
                 db=Instance(object)
                 client_info=Dict()
                 engine_info=Dict()
                                             'connection_request': self.connection_request,
                     }
+                    # ignore resubmit replies
+                    self.resubmit.on_recv(lambda msg: None, copy=False)
                     self.log.info("hub::created hub")
                 @property
                 def dispatch_monitor_traffic(self, msg):
                     """all ME and Task queue messages come through here, as well as
                     IOPub traffic."""
-                    self.log.debug("monitor traffic: %s"%msg[:2])
+                    self.log.debug("monitor traffic: %r"%msg[:2])
                     switch = msg[0]
                     idents, msg = self.session.feed_identities(msg[1:])
                     if not idents:
-                        self.log.error("Bad Monitor Message: %s"%msg)
+                        self.log.error("Bad Monitor Message: %r"%msg)
                         return
                     handler = self.monitor_handlers.get(switch, None)
                     if handler is not None:
                         handler(idents, msg)
                     else:
-                        self.log.error("Invalid monitor topic: %s"%switch)
+                        self.log.error("Invalid monitor topic: %r"%switch)
                 def dispatch_query(self, msg):
                     """Route registration requests and queries from clients."""
                     idents, msg = self.session.feed_identities(msg)
                     if not idents:
-                        self.log.error("Bad Query Message: %s"%msg)
+                        self.log.error("Bad Query Message: %r"%msg)
                         return
                     client_id = idents[0]
                     try:
                         msg = self.session.unpack_message(msg, content=True)
                     except:
                         content = error.wrap_exception()
-                        self.log.error("Bad Query Message: %s"%msg, exc_info=True)
+                        self.log.error("Bad Query Message: %r"%msg, exc_info=True)
                         self.session.send(self.query, "hub_error", ident=client_id,
                                 content=content)
                         return
                     # print client_id, header, parent, content
                     #switch on message type:
                     msg_type = msg['msg_type']
-                    self.log.info("client::client %s requested %s"%(client_id, msg_type))
+                    self.log.info("client::client %r requested %r"%(client_id, msg_type))
                     handler = self.query_handlers.get(msg_type, None)
                     try:
-                        assert handler is not None, "Bad Message Type: %s"%msg_type
+                        assert handler is not None, "Bad Message Type: %r"%msg_type
                     except:
                         content = error.wrap_exception()
-                        self.log.error("Bad Message Type: %s"%msg_type, exc_info=True)
+                        self.log.error("Bad Message Type: %r"%msg_type, exc_info=True)
                         self.session.send(self.query, "hub_error", ident=client_id,
                                 content=content)
                         return
                     else:
                         handler(idents, msg)
                         # it's posible iopub arrived first:
                         existing = self.db.get_record(msg_id)
                         for key,evalue in existing.iteritems():
-                            rvalue = record[key]
+                            rvalue = record.get(key, None)
                             if evalue and rvalue and evalue != rvalue:
-                                self.log.error("conflicting initial state for record: %s:%s <> %s"%(msg_id, rvalue, evalue))
+                                self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
                             elif evalue and not rvalue:
                                 record[key] = evalue
                         self.db.update_record(msg_id, record)
                     try:
                         # it's posible iopub arrived first:
                         existing = self.db.get_record(msg_id)
+                        if existing['resubmitted']:
+                            for key in ('submitted', 'client_uuid', 'buffers'):
+                                # don't clobber these keys on resubmit
+                                # submitted and client_uuid should be different
+                                # and buffers might be big, and shouldn't have changed
+                                record.pop(key)
+                                # still check content,header which should not change
+                                # but are not expensive to compare as buffers
                         for key,evalue in existing.iteritems():
-                            rvalue = record[key]
+                            if key.endswith('buffers'):
+                                # don't compare buffers
+                                continue
+                            rvalue = record.get(key, None)
                             if evalue and rvalue and evalue != rvalue:
-                                self.log.error("conflicting initial state for record: %s:%s <> %s"%(msg_id, rvalue, evalue))
+                                self.log.warn("conflicting initial state for record: %r:%r <%r> %r"%(msg_id, rvalue, key, evalue))
                             elif evalue and not rvalue:
                                 record[key] = evalue
                         self.db.update_record(msg_id, record)
                         except Exception:
                             reply = error.wrap_exception()
                     else:
-                        for msg_id in msg_ids:
+                        pending = filter(lambda m: m in self.pending, msg_ids)
-                            if msg_id in self.all_completed:
+                        if pending:
-                                self.db.drop_record(msg_id)
+                            try:
-                            else:
+                                raise IndexError("msg pending: %r"%pending[0])
-                                if msg_id in self.pending:
+                            except:
-                                    try:
+                                reply = error.wrap_exception()
-                                        raise IndexError("msg pending: %r"%msg_id)
+                        else:
-                                    except:
+                            try:
-                                        reply = error.wrap_exception()
+                                self.db.drop_matching_records(dict(msg_id={'$in':msg_ids}))
-                                else:
+                            except Exception:
+                                reply = error.wrap_exception()
+                        if reply['status'] == 'ok':
+                            eids = content.get('engine_ids', [])
+                            for eid in eids:
+                                if eid not in self.engines:
                                     try:
-                                        raise IndexError("No such msg: %r"%msg_id)
+                                        raise IndexError("No such engine: %i"%eid)
                                     except:
                                         reply = error.wrap_exception()
-                                break
+                                    break
-                        eids = content.get('engine_ids', [])
+                                msg_ids = self.completed.pop(eid)
-                        for eid in eids:
+                                uid = self.engines[eid].queue
-                            if eid not in self.engines:
                                 try:
-                                    raise IndexError("No such engine: %i"%eid)
+                                    self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
-                                except:
+                                except Exception:
                                     reply = error.wrap_exception()
-                                break
+                                    break
-                            msg_ids = self.completed.pop(eid)
-                            uid = self.engines[eid].queue
-                            try:
-                                self.db.drop_matching_records(dict(engine_uuid=uid, completed={'$ne':None}))
-                            except Exception:
-                                reply = error.wrap_exception()
-                                break
                     self.session.send(self.query, 'purge_reply', content=reply, ident=client_id)
-                def resubmit_task(self, client_id, msg, buffers):
+                def resubmit_task(self, client_id, msg):
-                    """Resubmit a task."""
+                    """Resubmit one or more tasks."""
-                    raise NotImplementedError
+                    def finish(reply):
+                        self.session.send(self.query, 'resubmit_reply', content=reply, ident=client_id)
+                    content = msg['content']
+                    msg_ids = content['msg_ids']
+                    reply = dict(status='ok')
+                    try:
+                        records = self.db.find_records({'msg_id' : {'$in' : msg_ids}}, keys=[
+                            'header', 'content', 'buffers'])
+                    except Exception:
+                        self.log.error('db::db error finding tasks to resubmit', exc_info=True)
+                        return finish(error.wrap_exception())
+                    # validate msg_ids
+                    found_ids = [ rec['msg_id'] for rec in records ]
+                    invalid_ids = filter(lambda m: m in self.pending, found_ids)
+                    if len(records) > len(msg_ids):
+                        try:
+                            raise RuntimeError("DB appears to be in an inconsistent state."
+                                "More matching records were found than should exist")
+                        except Exception:
+                            return finish(error.wrap_exception())
+                    elif len(records) < len(msg_ids):
+                        missing = [ m for m in msg_ids if m not in found_ids ]
+                        try:
+                            raise KeyError("No such msg(s): %s"%missing)
+                        except KeyError:
+                            return finish(error.wrap_exception())
+                    elif invalid_ids:
+                        msg_id = invalid_ids[0]
+                        try:
+                            raise ValueError("Task %r appears to be inflight"%(msg_id))
+                        except Exception:
+                            return finish(error.wrap_exception())
+                    # clear the existing records
+                    rec = empty_record()
+                    map(rec.pop, ['msg_id', 'header', 'content', 'buffers', 'submitted'])
+                    rec['resubmitted'] = datetime.now()
+                    rec['queue'] = 'task'
+                    rec['client_uuid'] = client_id[0]
+                    try:
+                        for msg_id in msg_ids:
+                            self.all_completed.discard(msg_id)
+                            self.db.update_record(msg_id, rec)
+                    except Exception:
+                        self.log.error('db::db error upating record', exc_info=True)
+                        reply = error.wrap_exception()
+                    else:
+                        # send the messages
+                        for rec in records:
+                            header = rec['header']
+                            msg = self.session.msg(header['msg_type'])
+                            msg['content'] = rec['content']
+                            msg['header'] = header
+                            msg['msg_id'] = rec['msg_id']
+                            self.session.send(self.resubmit, msg, buffers=rec['buffers'])
+                    finish(dict(status='ok'))
                 def _extract_record(self, rec):
                     """decompose a TaskRecord dict into subsection of reply for get_result"""
                     for msg_id in msg_ids:
                         if msg_id in self.pending:
                             pending.append(msg_id)
-                        elif msg_id in self.all_completed or msg_id in records:
+                        elif msg_id in self.all_completed:
                             completed.append(msg_id)
                             if not statusonly:
                                 c,bufs = self._extract_record(records[msg_id])
                                 content[msg_id] = c
                                 buffers.extend(bufs)
+                        elif msg_id in records:
+                            if rec['completed']:
+                                completed.append(msg_id)
+                                c,bufs = self._extract_record(records[msg_id])
+                                content[msg_id] = c
+                                buffers.extend(bufs)
+                            else:
+                                pending.append(msg_id)
                         else:
                             try:
                                 raise KeyError('No such message: '+msg_id)

IPython/parallel/controller/mongodb.py

0 +17 -12

             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
-            from datetime import datetime
             from pymongo import Connection
             from pymongo.binary import Binary
-            from IPython.utils.traitlets import Dict, List, CUnicode
+            from IPython.utils.traitlets import Dict, List, CUnicode, CStr, Instance
             from .dictdb import BaseDB
                 connection_args = List(config=True) # args passed to pymongo.Connection
                 connection_kwargs = Dict(config=True) # kwargs passed to pymongo.Connection
                 database = CUnicode(config=True) # name of the mongodb database
-                _table = Dict()
+                _connection = Instance(Connection) # pymongo connection
                 def __init__(self, **kwargs):
                     super(MongoDB, self).__init__(**kwargs)
-                    self._connection = Connection(*self.connection_args, **self.connection_kwargs)
+                    if self._connection is None:
+                        self._connection = Connection(*self.connection_args, **self.connection_kwargs)
                     if not self.database:
                         self.database = self.session
                     self._db = self._connection[self.database]
                     self._records = self._db['task_records']
+                    self._records.ensure_index('msg_id', unique=True)
+                    self._records.ensure_index('submitted') # for sorting history
+                    # for rec in self._records.find
                 def _binary_buffers(self, rec):
                     for key in ('buffers', 'result_buffers'):
                     """Add a new Task Record, by msg_id."""
                     # print rec
                     rec = self._binary_buffers(rec)
-                    obj_id = self._records.insert(rec)
+                    self._records.insert(rec)
-                    self._table[msg_id] = obj_id
                 def get_record(self, msg_id):
                     """Get a specific Task Record, by msg_id."""
-                    return self._records.find_one(self._table[msg_id])
+                    r = self._records.find_one({'msg_id': msg_id})
+                    if not r:
+                        # r will be '' if nothing is found
+                        raise KeyError(msg_id)
+                    return r
                 def update_record(self, msg_id, rec):
                     """Update the data in an existing record."""
                     rec = self._binary_buffers(rec)
-                    obj_id = self._table[msg_id]
-                    self._records.update({'_id':obj_id}, {'$set': rec})
+                    self._records.update({'msg_id':msg_id}, {'$set': rec})
                 def drop_matching_records(self, check):
                     """Remove a record from the DB."""
                 def drop_record(self, msg_id):
                     """Remove a record from the DB."""
-                    obj_id = self._table.pop(msg_id)
+                    self._records.remove({'msg_id':msg_id})
-                    self._records.remove(obj_id)
                 def find_records(self, check, keys=None):
                     """Find records matching a query dict, optionally extracting subset of keys.

IPython/parallel/controller/scheduler.py

0 +76 -32

                 # internals:
                 graph = Dict() # dict by msg_id of [ msg_ids that depend on key ]
+                retries = Dict() # dict by msg_id of retries remaining (non-neg ints)
                 # waiting = List() # list of msg_ids ready to run, but haven't due to HWM
                 depending = Dict() # dict by msg_id of (msg_id, raw_msg, after, follow)
                 pending = Dict() # dict by engine_uuid of submitted tasks
                     self.pending[uid] = {}
                     if len(self.targets) == 1:
                         self.resume_receiving()
+                    # rescan the graph:
+                    self.update_graph(None)
                 def _unregister_engine(self, uid):
                     """Existing engine with ident `uid` became unavailable."""
                     # handle any potentially finished tasks:
                     self.engine_stream.flush()
-                    self.completed.pop(uid)
+                    # don't pop destinations, because they might be used later
-                    self.failed.pop(uid)
-                    # don't pop destinations, because it might be used later
                     # map(self.destinations.pop, self.completed.pop(uid))
                     # map(self.destinations.pop, self.failed.pop(uid))
+                    # prevent this engine from receiving work
                     idx = self.targets.index(uid)
                     self.targets.pop(idx)
                     self.loads.pop(idx)
                     if self.pending[uid]:
                         dc = ioloop.DelayedCallback(lambda : self.handle_stranded_tasks(uid), 5000, self.loop)
                         dc.start()
+                    else:
+                        self.completed.pop(uid)
+                        self.failed.pop(uid)
                 @logged
                 def handle_stranded_tasks(self, engine):
                     """Deal with jobs resident in an engine that died."""
-                    lost = self.pending.pop(engine)
+                    lost = self.pending[engine]
+                    for msg_id in lost.keys():
-                    for msg_id, (raw_msg, targets, MET, follow, timeout) in lost.iteritems():
+                        if msg_id not in self.pending[engine]:
-                        self.all_failed.add(msg_id)
+                            # prevent double-handling of messages
-                        self.all_done.add(msg_id)
+                            continue
+                        raw_msg = lost[msg_id][0]
                         idents,msg = self.session.feed_identities(raw_msg, copy=False)
                         msg = self.session.unpack_message(msg, copy=False, content=False)
                         parent = msg['header']
-                        idents = [idents[0],engine]+idents[1:]
+                        idents = [engine, idents[0]]
-                        # print (idents)
+                        # build fake error reply
                         try:
                             raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
                         except:
                             content = error.wrap_exception()
-                        msg = self.session.send(self.client_stream, 'apply_reply', content,
+                        msg = self.session.msg('apply_reply', content, parent=parent, subheader={'status':'error'})
-                                                                parent=parent, ident=idents)
+                        raw_reply = map(zmq.Message, self.session.serialize(msg, ident=idents))
-                        self.session.send(self.mon_stream, msg, ident=['outtask']+idents)
+                        # and dispatch it
-                        self.update_graph(msg_id)
+                        self.dispatch_result(raw_reply)
+                    # finally scrub completed/failed lists
+                    self.completed.pop(engine)
+                    self.failed.pop(engine)
                 #-----------------------------------------------------------------------
                     # targets
                     targets = set(header.get('targets', []))
+                    retries = header.get('retries', 0)
+                    self.retries[msg_id] = retries
                     # time dependencies
                     after = Dependency(header.get('after', []))
                         # time deps already met, try to run
                         if not self.maybe_run(msg_id, *args):
                             # can't run yet
-                            self.save_unmet(msg_id, *args)
+                            if msg_id not in self.all_failed:
+                                # could have failed as unreachable
+                                self.save_unmet(msg_id, *args)
                     else:
                         self.save_unmet(msg_id, *args)
                         if msg_id in self.depending:
                             raw,after,targets,follow,timeout = self.depending[msg_id]
                             if timeout and timeout < now:
-                                self.fail_unreachable(msg_id, timeout=True)
+                                self.fail_unreachable(msg_id, error.TaskTimeout)
                 @logged
                 def fail_unreachable(self, msg_id, why=error.ImpossibleDependency):
                         # we need a can_run filter
                         def can_run(idx):
                             # check hwm
-                            if self.loads[idx] == self.hwm:
+                            if self.hwm and self.loads[idx] == self.hwm:
                                 return False
                             target = self.targets[idx]
                             # check blacklist
                             return follow.check(self.completed[target], self.failed[target])
                         indices = filter(can_run, range(len(self.targets)))
                         if not indices:
                             # couldn't run
                             if follow.all:
                                 for m in follow.intersection(relevant):
                                     dests.add(self.destinations[m])
                                 if len(dests) > 1:
+                                    self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
                                     self.fail_unreachable(msg_id)
                                     return False
                             if targets:
                                 # check blacklist+targets for impossibility
                                 targets.difference_update(blacklist)
                                 if not targets or not targets.intersection(self.targets):
+                                    self.depending[msg_id] = (raw_msg, targets, after, follow, timeout)
                                     self.fail_unreachable(msg_id)
                                     return False
                             return False
                         idents,msg = self.session.feed_identities(raw_msg, copy=False)
                         msg = self.session.unpack_message(msg, content=False, copy=False)
                         engine = idents[0]
-                        idx = self.targets.index(engine)
+                        try:
-                        self.finish_job(idx)
+                            idx = self.targets.index(engine)
+                        except ValueError:
+                            pass # skip load-update for dead engines
+                        else:
+                            self.finish_job(idx)
                     except Exception:
                         self.log.error("task::Invaid result: %s"%raw_msg, exc_info=True)
                         return
                     header = msg['header']
+                    parent = msg['parent_header']
                     if header.get('dependencies_met', True):
                         success = (header['status'] == 'ok')
-                        self.handle_result(idents, msg['parent_header'], raw_msg, success)
+                        msg_id = parent['msg_id']
-                        # send to Hub monitor
+                        retries = self.retries[msg_id]
-                        self.mon_stream.send_multipart(['outtask']+raw_msg, copy=False)
+                        if not success and retries > 0:
+                            # failed
+                            self.retries[msg_id] = retries - 1
+                            self.handle_unmet_dependency(idents, parent)
+                        else:
+                            del self.retries[msg_id]
+                            # relay to client and update graph
+                            self.handle_result(idents, parent, raw_msg, success)
+                            # send to Hub monitor
+                            self.mon_stream.send_multipart(['outtask']+raw_msg, copy=False)
                     else:
-                        self.handle_unmet_dependency(idents, msg['parent_header'])
+                        self.handle_unmet_dependency(idents, parent)
                 @logged
                 def handle_result(self, idents, parent, raw_msg, success=True):
                         self.depending[msg_id] = args
                         self.fail_unreachable(msg_id)
                     elif not self.maybe_run(msg_id, *args):
-                        # resubmit failed, put it back in our dependency tree
+                        # resubmit failed
-                        self.save_unmet(msg_id, *args)
+                        if msg_id not in self.all_failed:
+                            # put it back in our dependency tree
+                            self.save_unmet(msg_id, *args)
                     if self.hwm:
-                        idx = self.targets.index(engine)
+                        try:
-                        if self.loads[idx] == self.hwm-1:
+                            idx = self.targets.index(engine)
-                            self.update_graph(None)
+                        except ValueError:
+                            pass # skip load-update for dead engines
+                        else:
+                            if self.loads[idx] == self.hwm-1:
+                                self.update_graph(None)
                     """dep_id just finished. Update our dependency
                     graph and submit any jobs that just became runable.
-                    Called with dep_id=None to update graph for hwm, but without finishing
+                    Called with dep_id=None to update entire graph for hwm, but without finishing
                     a task.
                     """
                     # print ("\n\n***********")
                     # print ("\n\n***********\n\n")
                     # update any jobs that depended on the dependency
                     jobs = self.graph.pop(dep_id, [])
-                    # if we have HWM and an engine just become no longer full
-                    # recheck *all* jobs:
+                    # recheck *all* jobs if
-                    if self.hwm and any( [ load==self.hwm-1 for load in self.loads]):
+                    # a) we have HWM and an engine just become no longer full
+                    # or b) dep_id was given as None
+                    if dep_id is None or self.hwm and any( [ load==self.hwm-1 for load in self.loads ]):
                         jobs = self.depending.keys()
                     for msg_id in jobs:

IPython/parallel/controller/sqlitedb.py

0 +25 -11

              '$lt' : "<",
              '$gt' : ">",
              # null is handled weird with ==,!=
-             '$eq' : "IS",
+             '$eq' : "=",
-             '$ne' : "IS NOT",
+             '$ne' : "!=",
              '$lte': "<=",
              '$gte': ">=",
-             '$in' : ('IS', ' OR '),
+             '$in' : ('=', ' OR '),
-             '$nin': ('IS NOT', ' AND '),
+             '$nin': ('!=', ' AND '),
              # '$all': None,
              # '$mod': None,
              # '$exists' : None
             }
+            null_operators = {
+            '=' : "IS NULL",
+            '!=' : "IS NOT NULL",
+            }
             def _adapt_datetime(dt):
                 return dt.strftime(ISO8601)
                                     raise KeyError("Unsupported operator: %r"%test)
                                 if isinstance(op, tuple):
                                     op, join = op
-                                expr = "%s %s ?"%(name, op)
-                                if isinstance(value, (tuple,list)):
+                                if value is None and op in null_operators:
-                                    expr = '( %s )'%( join.join([expr]*len(value)) )
+                                        expr = "%s %s"%null_operators[op]
-                                    args.extend(value)
                                 else:
-                                    args.append(value)
+                                    expr = "%s %s ?"%(name, op)
+                                    if isinstance(value, (tuple,list)):
+                                        if op in null_operators and any([v is None for v in value]):
+                                            # equality tests don't work with NULL
+                                            raise ValueError("Cannot use %r test with NULL values on SQLite backend"%test)
+                                        expr = '( %s )'%( join.join([expr]*len(value)) )
+                                        args.extend(value)
+                                    else:
+                                        args.append(value)
                                 expressions.append(expr)
                         else:
                             # it's an equality check
-                            expressions.append("%s IS ?"%name)
+                            if sub_check is None:
-                            args.append(sub_check)
+                                expressions.append("%s IS NULL")
+                            else:
+                                expressions.append("%s = ?"%name)
+                                args.append(sub_check)
                     expr = " AND ".join(expressions)
                     return expr, args

IPython/parallel/streamsession.py

0 +32 -23

@@ -176,6 +176,37 class StreamSession(object):
176	header = extract_header(msg_or_header)	176	header = extract_header(msg_or_header)
177	return header.get('key', None) == self.key	177	return header.get('key', None) == self.key
178		178
		179
		180	def serialize(self, msg, ident=None):
		181	content = msg.get('content', {})
		182	if content is None:
		183	content = self.none
		184	elif isinstance(content, dict):
		185	content = self.pack(content)
		186	elif isinstance(content, bytes):
		187	# content is already packed, as in a relayed message
		188	pass
		189	elif isinstance(content, unicode):
		190	# should be bytes, but JSON often spits out unicode
		191	content = content.encode('utf8')
		192	else:
		193	raise TypeError("Content incorrect type: %s"%type(content))
		194
		195	to_send = []
		196
		197	if isinstance(ident, list):
		198	# accept list of idents
		199	to_send.extend(ident)
		200	elif ident is not None:
		201	to_send.append(ident)
		202	to_send.append(DELIM)
		203	if self.key is not None:
		204	to_send.append(self.key)
		205	to_send.append(self.pack(msg['header']))
		206	to_send.append(self.pack(msg['parent_header']))
		207	to_send.append(content)
		208
		209	return to_send
179		210
180	def send(self, stream, msg_or_type, content=None, buffers=None, parent=None, subheader=None, ident=None, track=False):	211	def send(self, stream, msg_or_type, content=None, buffers=None, parent=None, subheader=None, ident=None, track=False):
181	"""Build and send a message via stream or socket.	212	"""Build and send a message via stream or socket.
@@ -221,33 +252,11 class StreamSession(object):
221	# we got a Message, not a msg_type	252	# we got a Message, not a msg_type
222	# don't build a new Message	253	# don't build a new Message
223	msg = msg_or_type	254	msg = msg_or_type
224	content = msg['content']
225	else:	255	else:
226	msg = self.msg(msg_or_type, content, parent, subheader)	256	msg = self.msg(msg_or_type, content, parent, subheader)
227		257
228	buffers = [] if buffers is None else buffers	258	buffers = [] if buffers is None else buffers
229	to_send = []	259	to_send = self.serialize(msg, ident)
230	if isinstance(ident, list):
231	# accept list of idents
232	to_send.extend(ident)
233	elif ident is not None:
234	to_send.append(ident)
235	to_send.append(DELIM)
236	if self.key is not None:
237	to_send.append(self.key)
238	to_send.append(self.pack(msg['header']))
239	to_send.append(self.pack(msg['parent_header']))
240
241	if content is None:
242	content = self.none
243	elif isinstance(content, dict):
244	content = self.pack(content)
245	elif isinstance(content, bytes):
246	# content is already packed, as in a relayed message
247	pass
248	else:
249	raise TypeError("Content incorrect type: %s"%type(content))
250	to_send.append(content)
251	flag = 0	260	flag = 0
252	if buffers:	261	if buffers:
253	flag = zmq.SNDMORE	262	flag = zmq.SNDMORE

IPython/parallel/tests/__init__.py

0 +1 -1

             def setup():
                 cp = TestProcessLauncher()
                 cp.cmd_and_args = ipcontroller_cmd_argv + \
-                            ['--profile', 'iptest', '--log-level', '99', '-r', '--usethreads']
+                            ['--profile', 'iptest', '--log-level', '99', '-r']
                 cp.start()
                 launchers.append(cp)
                 cluster_dir = os.path.join(get_ipython_dir(), 'cluster_iptest')

IPython/parallel/tests/test_client.py

0 +30 0

@@ -212,3 +212,33 class TestClient(ClusterTestCase):
212	time.sleep(0.25)	212	time.sleep(0.25)
213	self.assertEquals(self.client.hub_history()[-1:],ar.msg_ids)	213	self.assertEquals(self.client.hub_history()[-1:],ar.msg_ids)
214		214
		215	def test_resubmit(self):
		216	def f():
		217	import random
		218	return random.random()
		219	v = self.client.load_balanced_view()
		220	ar = v.apply_async(f)
		221	r1 = ar.get(1)
		222	ahr = self.client.resubmit(ar.msg_ids)
		223	r2 = ahr.get(1)
		224	self.assertFalse(r1 == r2)
		225
		226	def test_resubmit_inflight(self):
		227	"""ensure ValueError on resubmit of inflight task"""
		228	v = self.client.load_balanced_view()
		229	ar = v.apply_async(time.sleep,1)
		230	# give the message a chance to arrive
		231	time.sleep(0.2)
		232	self.assertRaisesRemote(ValueError, self.client.resubmit, ar.msg_ids)
		233	ar.get(2)
		234
		235	def test_resubmit_badkey(self):
		236	"""ensure KeyError on resubmit of nonexistant task"""
		237	self.assertRaisesRemote(KeyError, self.client.resubmit, ['invalid'])
		238
		239	def test_purge_results(self):
		240	hist = self.client.hub_history()
		241	self.client.purge_results(hist)
		242	newhist = self.client.hub_history()
		243	self.assertTrue(len(newhist) == 0)
		244

IPython/parallel/tests/test_db.py

0 +7 -19

@@ -15,10 +15,7
15	import tempfile	15	import tempfile
16	import time	16	import time
17		17
18	import uuid
19
20	from datetime import datetime, timedelta	18	from datetime import datetime, timedelta
21	from random import choice, randint
22	from unittest import TestCase	19	from unittest import TestCase
23		20
24	from nose import SkipTest	21	from nose import SkipTest
@@ -157,6 +154,13 class TestDictBackend(TestCase):
157	self.db.update_record(msg_id, dict(completed=datetime.now()))	154	self.db.update_record(msg_id, dict(completed=datetime.now()))
158	rec = self.db.get_record(msg_id)	155	rec = self.db.get_record(msg_id)
159	self.assertTrue(isinstance(rec['completed'], datetime))	156	self.assertTrue(isinstance(rec['completed'], datetime))
		157
		158	def test_drop_matching(self):
		159	msg_ids = self.load_records(10)
		160	query = {'msg_id' : {'$in':msg_ids}}
		161	self.db.drop_matching_records(query)
		162	recs = self.db.find_records(query)
		163	self.assertTrue(len(recs)==0)
160		164
161	class TestSQLiteBackend(TestDictBackend):	165	class TestSQLiteBackend(TestDictBackend):
162	def create_db(self):	166	def create_db(self):
@@ -164,19 +168,3 class TestSQLiteBackend(TestDictBackend):
164		168
165	def tearDown(self):	169	def tearDown(self):
166	self.db._db.close()	170	self.db._db.close()
167
168	# optional MongoDB test
169	try:
170	from IPython.parallel.controller.mongodb import MongoDB
171	except ImportError:
172	pass
173	else:
174	class TestMongoBackend(TestDictBackend):
175	def create_db(self):
176	try:
177	return MongoDB(database='iptestdb')
178	except Exception:
179	raise SkipTest("Couldn't connect to mongodb instance")
180
181	def tearDown(self):
182	self.db._connection.drop_database('iptestdb')

IPython/parallel/tests/test_view.py

0 +1 -13

             from IPython import parallel  as pmod
             from IPython.parallel import error
             from IPython.parallel import AsyncResult, AsyncHubResult, AsyncMapResult
-            from IPython.parallel import LoadBalancedView, DirectView
+            from IPython.parallel import DirectView
             from IPython.parallel.util import interactive
             from IPython.parallel.tests import add_engines
             class TestView(ClusterTestCase):
-                def test_z_crash_task(self):
-                    """test graceful handling of engine death (balanced)"""
-                    # self.add_engines(1)
-                    ar = self.client[-1].apply_async(crash)
-                    self.assertRaisesRemote(error.EngineError, ar.get)
-                    eid = ar.engine_id
-                    tic = time.time()
-                    while eid in self.client.ids and time.time()-tic < 5:
-                        time.sleep(.01)
-                        self.client.spin()
-                    self.assertFalse(eid in self.client.ids, "Engine should have died")
                 def test_z_crash_mux(self):
                     """test graceful handling of engine death (direct)"""
                     # self.add_engines(1)

IPython/testing/iptest.py

0 +1 0

                 if not have['pymongo']:
                     exclusions.append(ipjoin('parallel', 'controller', 'mongodb'))
+                    exclusions.append(ipjoin('parallel', 'tests', 'test_mongodb'))
                 if not have['matplotlib']:
                     exclusions.extend([ipjoin('lib', 'pylabtools'),

docs/source/parallel/index.txt

0 +1 0

                parallel_multiengine.txt
                parallel_task.txt
                parallel_mpi.txt
+               parallel_db.txt
                parallel_security.txt
                parallel_winhpc.txt
                parallel_demos.txt

docs/source/parallel/parallel_task.txt

0 +24 0

             Impossible Dependencies
             ***********************
                 This analysis has not been proven to be rigorous, so it is likely possible for tasks
                 to become impossible to run in obscure situations, so a timeout may be a good choice.
+            Retries and Resubmit
+            ====================
+            Retries
+            -------
+            Another flag for tasks is `retries`.  This is an integer, specifying how many times
+            a task should be resubmitted after failure.  This is useful for tasks that should still run
+            if their engine was shutdown, or may have some statistical chance of failing.  The default
+            is to not retry tasks.
+            Resubmit
+            --------
+            Sometimes you may want to re-run a task. This could be because it failed for some reason, and
+            you have fixed the error, or because you want to restore the cluster to an interrupted state.
+            For this, the :class:`Client` has a :meth:`rc.resubmit` method.  This simply takes one or more
+            msg_ids, and returns an :class:`AsyncHubResult` for the result(s).  You cannot resubmit
+            a task that is pending - only those that have finished, either successful or unsuccessful.
             .. _parallel_schedulers:
             Schedulers
                 TODO: performance comparisons
             More details
             ============

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages