upstream/ipython Commit - r3587:15b75670

add map/scatter/gather/ParallelFunction from kernel

MinRK -

r3587:15b75670

parent child

IPython/zmq/parallel/map.py

0 created 644 +158 0

			@@ -0,0 +1,158 b''
		1	# encoding: utf-8
		2
		3	"""Classes used in scattering and gathering sequences.
		4
		5	Scattering consists of partitioning a sequence and sending the various
		6	pieces to individual nodes in a cluster.
		7	"""
		8
		9	__docformat__ = "restructuredtext en"
		10
		11	#-------------------------------------------------------------------------------
		12	# Copyright (C) 2008 The IPython Development Team
		13	#
		14	# Distributed under the terms of the BSD License. The full license is in
		15	# the file COPYING, distributed as part of this software.
		16	#-------------------------------------------------------------------------------
		17
		18	#-------------------------------------------------------------------------------
		19	# Imports
		20	#-------------------------------------------------------------------------------
		21
		22	import types
		23
		24	from IPython.utils.data import flatten as utils_flatten
		25
		26	#-------------------------------------------------------------------------------
		27	# Figure out which array packages are present and their array types
		28	#-------------------------------------------------------------------------------
		29
		30	arrayModules = []
		31	try:
		32	import Numeric
		33	except ImportError:
		34	pass
		35	else:
		36	arrayModules.append({'module':Numeric, 'type':Numeric.arraytype})
		37	try:
		38	import numpy
		39	except ImportError:
		40	pass
		41	else:
		42	arrayModules.append({'module':numpy, 'type':numpy.ndarray})
		43	try:
		44	import numarray
		45	except ImportError:
		46	pass
		47	else:
		48	arrayModules.append({'module':numarray,
		49	'type':numarray.numarraycore.NumArray})
		50
		51	class Map:
		52	"""A class for partitioning a sequence using a map."""
		53
		54	def getPartition(self, seq, p, q):
		55	"""Returns the pth partition of q partitions of seq."""
		56
		57	# Test for error conditions here
		58	if p<0 or p>=q:
		59	print "No partition exists."
		60	return
		61
		62	remainder = len(seq)%q
		63	basesize = len(seq)/q
		64	hi = []
		65	lo = []
		66	for n in range(q):
		67	if n < remainder:
		68	lo.append(n * (basesize + 1))
		69	hi.append(lo[-1] + basesize + 1)
		70	else:
		71	lo.append(n*basesize + remainder)
		72	hi.append(lo[-1] + basesize)
		73
		74
		75	result = seq[lo[p]:hi[p]]
		76	return result
		77
		78	def joinPartitions(self, listOfPartitions):
		79	return self.concatenate(listOfPartitions)
		80
		81	def concatenate(self, listOfPartitions):
		82	testObject = listOfPartitions[0]
		83	# First see if we have a known array type
		84	for m in arrayModules:
		85	#print m
		86	if isinstance(testObject, m['type']):
		87	return m['module'].concatenate(listOfPartitions)
		88	# Next try for Python sequence types
		89	if isinstance(testObject, (types.ListType, types.TupleType)):
		90	return utils_flatten(listOfPartitions)
		91	# If we have scalars, just return listOfPartitions
		92	return listOfPartitions
		93
		94	class RoundRobinMap(Map):
		95	"""Partitions a sequence in a roun robin fashion.
		96
		97	This currently does not work!
		98	"""
		99
		100	def getPartition(self, seq, p, q):
		101	# if not isinstance(seq,(list,tuple)):
		102	# raise NotImplementedError("cannot RR partition type %s"%type(seq))
		103	return seq[p:len(seq):q]
		104	#result = []
		105	#for i in range(p,len(seq),q):
		106	# result.append(seq[i])
		107	#return result
		108
		109	def joinPartitions(self, listOfPartitions):
		110	testObject = listOfPartitions[0]
		111	# First see if we have a known array type
		112	for m in arrayModules:
		113	#print m
		114	if isinstance(testObject, m['type']):
		115	return self.flatten_array(m['type'], listOfPartitions)
		116	if isinstance(testObject, (types.ListType, types.TupleType)):
		117	return self.flatten_list(listOfPartitions)
		118	return listOfPartitions
		119
		120	def flatten_array(self, klass, listOfPartitions):
		121	test = listOfPartitions[0]
		122	shape = list(test.shape)
		123	shape[0] = sum([ p.shape[0] for p in listOfPartitions])
		124	A = klass(shape)
		125	N = shape[0]
		126	q = len(listOfPartitions)
		127	for p,part in enumerate(listOfPartitions):
		128	A[p:N:q] = part
		129	return A
		130
		131	def flatten_list(self, listOfPartitions):
		132	flat = []
		133	for i in range(len(listOfPartitions[0])):
		134	flat.extend([ part[i] for part in listOfPartitions if len(part) > i ])
		135	return flat
		136	#lengths = [len(x) for x in listOfPartitions]
		137	#maxPartitionLength = len(listOfPartitions[0])
		138	#numberOfPartitions = len(listOfPartitions)
		139	#concat = self.concatenate(listOfPartitions)
		140	#totalLength = len(concat)
		141	#result = []
		142	#for i in range(maxPartitionLength):
		143	# result.append(concat[i:totalLength:maxPartitionLength])
		144	# return self.concatenate(listOfPartitions)
		145
		146	def mappable(obj):
		147	"""return whether an object is mappable or not."""
		148	if isinstance(obj, (tuple,list)):
		149	return True
		150	for m in arrayModules:
		151	if isinstance(obj,m['type']):
		152	return True
		153	return False
		154
		155	dists = {'b':Map,'r':RoundRobinMap}
		156
		157
		158

IPython/zmq/parallel/client.py

0 +185 -8

              from view import DirectView, LoadBalancedView
              from dependency import Dependency, depend, require
              import error
+             import map as Map
              #--------------------------------------------------------------------------
              # helpers for implementing old MEC API via client.apply
                      return RemoteFunction(client, f, bound, block, targets)
                  return remote_function
+             def parallel(client, dist='b', bound=False, block=None, targets='all'):
+                 """Turn a function into a parallel remote function.
+                 This method can be used for map:
+                 >>> @parallel(client,block=True)
+                     def func(a)
+                 """
+                 def parallel_function(f):
+                     return ParallelFunction(client, f, dist, bound, block, targets)
+                 return parallel_function
              #--------------------------------------------------------------------------
              # Classes
              #--------------------------------------------------------------------------
                              block=self.block, targets=self.targets, bound=self.bound)
+             class ParallelFunction(RemoteFunction):
+                 """Class for mapping a function to sequences."""
+                 def __init__(self, client, f, dist='b', bound=False, block=None, targets='all'):
+                     super(ParallelFunction, self).__init__(client,f,bound,block,targets)
+                     mapClass = Map.dists[dist]
+                     self.mapObject = mapClass()
+                 def __call__(self, *sequences):
+                     len_0 = len(sequences[0])
+                     for s in sequences:
+                         if len(s)!=len_0:
+                             raise ValueError('all sequences must have equal length')
+                     if self.targets is None:
+                         # load-balanced:
+                         engines = [None]*len_0
+                     else:
+                         # multiplexed:
+                         engines = self.client._build_targets(self.targets)[-1]
+                     nparts = len(engines)
+                     msg_ids = []
+                     for index, engineid in enumerate(engines):
+                         args = []
+                         for seq in sequences:
+                             args.append(self.mapObject.getPartition(seq, index, nparts))
+                         mid = self.client.apply(self.func, args=args, block=False,
+                                     bound=self.bound,
+                                     targets=engineid)
+                         msg_ids.append(mid)
+                     if self.block:
+                         dg = PendingMapResult(self.client, msg_ids, self.mapObject)
+                         dg.wait()
+                         return dg.result
+                     else:
+                         return dg
+             class PendingResult(object):
+                 """Class for representing results of non-blocking calls."""
+                 def __init__(self, client, msg_ids):
+                     self.client = client
+                     self.msg_ids = msg_ids
+                     self._result = None
+                     self.done = False
+                 def __repr__(self):
+                     if self.done:
+                         return "<%s: finished>"%(self.__class__.__name__)
+                     else:
+                         return "<%s: %r>"%(self.__class__.__name__,self.msg_ids)
+                 @property
+                 def result(self):
+                     if self._result is not None:
+                         return self._result
+                     if not self.done:
+                         self.wait(0)
+                     if self.done:
+                         results = map(self.client.results.get, self.msg_ids)
+                         results = error.collect_exceptions(results, 'get_result')
+                         self._result = self.reconstruct_result(results)
+                         return self._result
+                     else:
+                         raise error.ResultNotCompleted
+                 def reconstruct_result(self, res):
+                     """
+                     Override me in subclasses for turning a list of results
+                     into the expected form.
+                     """
+                     if len(res) == 1:
+                         return res[0]
+                     else:
+                         return res
+                 def wait(self, timout=-1):
+                     self.done = self.client.barrier(self.msg_ids)
+                     return self.done
+             class PendingMapResult(PendingResult):
+                 """Class for representing results of non-blocking gathers.
+                 This will properly reconstruct the gather.
+                 """
+                 def __init__(self, client, msg_ids, mapObject):
+                     self.mapObject = mapObject
+                     PendingResult.__init__(self, client, msg_ids)
+                 def reconstruct_result(self, res):
+                     """Perform the gather on the actual results."""
+                     return self.mapObject.joinPartitions(res)
              class AbortedTask(object):
                  """A basic wrapper object describing an aborted task."""
                  def __init__(self, msg_id):
                  # Begin public methods
                  #--------------------------------------------------------------------------
+                 @property
+                 def remote(self):
+                     """property for convenient RemoteFunction generation.
+                     >>> @client.remote
+                     ... def f():
+                             import os
+                             print (os.getpid())
+                     """
+                     return remote(self, block=self.block)
                  def spin(self):
                      """Flush any registration notifications and execution results
                      waiting in the ZMQ queue.
                          self.barrier(msg_id)
                          return self._maybe_raise(self.results[msg_id])
                      else:
-                         return msg_id
+                         return PendingResult(self, [msg_id])
                  def _apply_direct(self, f, args, kwargs, bound=True, block=None, targets=None,
                                              after=None, follow=None):
                      if block:
                          self.barrier(msg_ids)
                      else:
-                         if len(msg_ids) == 1:
-                             return msg_ids[0]
-                         else:
-                             return msg_ids
+                         return PendingResult(self, msg_ids)
                      if len(msg_ids) == 1:
                          return self._maybe_raise(self.results[msg_ids[0]])
                      else:
                                  result[target] = self.results[mid]
                          return error.collect_exceptions(result, f.__name__)
+                 @defaultblock
+                 def map(self, f, sequences, targets=None, block=None, bound=False):
+                     pf = ParallelFunction(self,f,block=block,bound=bound,targets=targets)
+                     return pf(*sequences)
                  #--------------------------------------------------------------------------
                  # Data movement
                  #--------------------------------------------------------------------------
                  @defaultblock
-                 def push(self, ns, targets=None, block=None):
+                 def push(self, ns, targets='all', block=None):
                      """Push the contents of `ns` into the namespace on `target`"""
                      if not isinstance(ns, dict):
                          raise TypeError("Must be a dict, not %s"%type(ns))
                      return result
                  @defaultblock
-                 def pull(self, keys, targets=None, block=True):
+                 def pull(self, keys, targets='all', block=True):
                      """Pull objects from `target`'s namespace by `keys`"""
                      if isinstance(keys, str):
                          pass
                      result = self.apply(_pull, (keys,), targets=targets, block=block, bound=True)
                      return result
+                 @defaultblock
+                 def scatter(self, key, seq, dist='b', flatten=False, targets='all', block=None):
+                     """
+                     Partition a Python sequence and send the partitions to a set of engines.
+                     """
+                     targets = self._build_targets(targets)[-1]
+                     mapObject = Map.dists[dist]()
+                     nparts = len(targets)
+                     msg_ids = []
+                     for index, engineid in enumerate(targets):
+                         partition = mapObject.getPartition(seq, index, nparts)
+                         if flatten and len(partition) == 1:
+                             mid = self.push({key: partition[0]}, targets=engineid, block=False)
+                         else:
+                             mid = self.push({key: partition}, targets=engineid, block=False)
+                         msg_ids.append(mid)
+                     r = PendingResult(self, msg_ids)
+                     if block:
+                         r.wait()
+                         return
+                     else:
+                         return r
+                 @defaultblock
+                 def gather(self, key, dist='b', targets='all', block=True):
+                     """
+                     Gather a partitioned sequence on a set of engines as a single local seq.
+                     """
+                     targets = self._build_targets(targets)[-1]
+                     mapObject = Map.dists[dist]()
+                     msg_ids = []
+                     for index, engineid in enumerate(targets):
+                         msg_ids.append(self.pull(key, targets=engineid,block=False))
+                     r = PendingMapResult(self, msg_ids, mapObject)
+                     if block:
+                         r.wait()
+                         return r.result
+                     else:
+                         return r
                  #--------------------------------------------------------------------------
                  # Query methods
                  #--------------------------------------------------------------------------
                      for stream in (self.queue_stream, self.notifier_stream,
                                      self.task_stream, self.control_stream):
                          stream.flush()
+             __all__ = [ 'Client',
+                         'depend',
+                         'require',
+                         'remote',
+                         'parallel',
+                         'RemoteFunction',
+                         'ParallelFunction',
+                         'DirectView',
+                         'LoadBalancedView',
+                         'PendingResult',
+                         'PendingMapResult'
+                         ]

IPython/zmq/parallel/error.py

0 +7 -3

                              et,ev,tb = sys.exc_info()
-             def collect_exceptions(rdict, method):
+             def collect_exceptions(rdict_or_list, method):
                  """check a result dict for errors, and raise CompositeError if any exist.
                  Passthrough otherwise."""
                  elist = []
-                 for r in rdict.values():
+                 if isinstance(rdict_or_list, dict):
+                     rlist = rdict_or_list.values()
+                 else:
+                     rlist = rdict_or_list
+                 for r in rlist:
                      if isinstance(r, RemoteError):
                          en, ev, etb, ei = r.ename, r.evalue, r.traceback, r.engine_info
                          # Sometimes we could have CompositeError in our list.  Just take
                          else:
                              elist.append((en, ev, etb, ei))
                  if len(elist)==0:
-                     return rdict
+                     return rdict_or_list
                  else:
                      msg = "one or more exceptions from call to method: %s" % (method)
                      # This silliness is needed so the debugger has access to the exception

IPython/zmq/parallel/view.py

0 +21 0

                      block = block if block is not None else self.block
                      return self.client.pull(key_s, block=block, targets=self.targets)
+                 def scatter(self, key, seq, dist='b', flatten=False, targets=None, block=None):
+                     """
+                     Partition a Python sequence and send the partitions to a set of engines.
+                     """
+                     block = block if block is not None else self.block
+                     if targets is None:
+                         targets = self.targets
+                     return self.client.scatter(key, seq, dist=dist, flatten=flatten,
+                                 targets=targets, block=block)
+                 def gather(self, key, dist='b', targets=None, block=True):
+                     """
+                     Gather a partitioned sequence on a set of engines as a single local seq.
+                     """
+                     block = block if block is not None else self.block
+                     if targets is None:
+                         targets = self.targets
+                     return self.client.gather(key, dist=dist, targets=targets, block=block)
                  def __getitem__(self, key):
                      return self.get(key)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages