upstream/ipython Commit - r10605:9f313b4a

Merge pull request from minrk/mapgenerator...

Brian E. Granger -

r10605:9f313b4a

parent child

IPython/parallel/client/map.py

0 +13 -16

             # encoding: utf-8
             """Classes used in scattering and gathering sequences.
             Scattering consists of partitioning a sequence and sending the various
             pieces to individual nodes in a cluster.
             Authors:
             * Brian Granger
             * MinRK
             """
             #-------------------------------------------------------------------------------
             #  Copyright (C) 2008-2011  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-------------------------------------------------------------------------------
             #-------------------------------------------------------------------------------
             # Imports
             #-------------------------------------------------------------------------------
             from __future__ import division
             import types
             from itertools import islice
             from IPython.utils.data import flatten as utils_flatten
             #-------------------------------------------------------------------------------
             # Figure out which array packages are present and their array types
             #-------------------------------------------------------------------------------
             arrayModules = []
             try:
                 import Numeric
             except ImportError:
                 pass
             else:
                 arrayModules.append({'module':Numeric, 'type':Numeric.arraytype})
             try:
                 import numpy
             except ImportError:
                 pass
             else:
                 arrayModules.append({'module':numpy, 'type':numpy.ndarray})
             try:
                 import numarray
             except ImportError:
                 pass
             else:
                 arrayModules.append({'module':numarray,
                     'type':numarray.numarraycore.NumArray})
             class Map(object):
                 """A class for partitioning a sequence using a map."""
-                def getPartition(self, seq, p, q):
+                def getPartition(self, seq, p, q, n=None):
-                    """Returns the pth partition of q partitions of seq."""
+                    """Returns the pth partition of q partitions of seq.
+                    The length can be specified as `n`,
+                    otherwise it is the value of `len(seq)`
+                    """
+                    n = len(seq) if n is None else n
                     # Test for error conditions here
                     if p<0 or p>=q:
-                      print "No partition exists."
+                      raise ValueError("must have 0 <= p <= q, but have p=%s,q=%s" % (p, q))
-                      return
-                    N = len(seq)
+                    remainder = n % q
-                    remainder = N % q
+                    basesize = n // q
-                    basesize = N // q
                     if p < remainder:
                         low = p * (basesize + 1)
                         high = low + basesize + 1
                     else:
                         low = p * basesize + remainder
                         high = low + basesize
                     try:
                         result = seq[low:high]
                     except TypeError:
                         # some objects (iterators) can't be sliced,
                         # use islice:
                         result = list(islice(seq, low, high))
                     return result
                 def joinPartitions(self, listOfPartitions):
                     return self.concatenate(listOfPartitions)
                 def concatenate(self, listOfPartitions):
                     testObject = listOfPartitions[0]
                     # First see if we have a known array type
                     for m in arrayModules:
                         #print m
                         if isinstance(testObject, m['type']):
                             return m['module'].concatenate(listOfPartitions)
                     # Next try for Python sequence types
                     if isinstance(testObject, (types.ListType, types.TupleType)):
                         return utils_flatten(listOfPartitions)
                     # If we have scalars, just return listOfPartitions
                     return listOfPartitions
             class RoundRobinMap(Map):
-                """Partitions a sequence in a roun robin fashion.
+                """Partitions a sequence in a round robin fashion.
                 This currently does not work!
                 """
-                def getPartition(self, seq, p, q):
+                def getPartition(self, seq, p, q, n=None):
-                    # if not isinstance(seq,(list,tuple)):
+                    n = len(seq) if n is None else n
-                    #     raise NotImplementedError("cannot RR partition type %s"%type(seq))
+                    return seq[p:n:q]
-                    return seq[p:len(seq):q]
-                    #result = []
-                    #for i in range(p,len(seq),q):
-                    #    result.append(seq[i])
-                    #return result
                 def joinPartitions(self, listOfPartitions):
                     testObject = listOfPartitions[0]
                     # First see if we have a known array type
                     for m in arrayModules:
                         #print m
                         if isinstance(testObject, m['type']):
                             return self.flatten_array(m['type'], listOfPartitions)
                     if isinstance(testObject, (types.ListType, types.TupleType)):
                         return self.flatten_list(listOfPartitions)
                     return listOfPartitions
                 def flatten_array(self, klass, listOfPartitions):
                     test = listOfPartitions[0]
                     shape = list(test.shape)
                     shape[0] = sum([ p.shape[0] for p in listOfPartitions])
                     A = klass(shape)
                     N = shape[0]
                     q = len(listOfPartitions)
                     for p,part in enumerate(listOfPartitions):
                         A[p:N:q] = part
                     return A
                 def flatten_list(self, listOfPartitions):
                     flat = []
                     for i in range(len(listOfPartitions[0])):
                         flat.extend([ part[i] for part in listOfPartitions if len(part) > i ])
                     return flat
                     #lengths = [len(x) for x in listOfPartitions]
                     #maxPartitionLength = len(listOfPartitions[0])
                     #numberOfPartitions = len(listOfPartitions)
                     #concat = self.concatenate(listOfPartitions)
                     #totalLength = len(concat)
                     #result = []
                     #for i in range(maxPartitionLength):
                     #    result.append(concat[i:totalLength:maxPartitionLength])
                     # return self.concatenate(listOfPartitions)
             def mappable(obj):
                 """return whether an object is mappable or not."""
                 if isinstance(obj, (tuple,list)):
                     return True
                 for m in arrayModules:
                     if isinstance(obj,m['type']):
                         return True
                 return False
             dists = {'b':Map,'r':RoundRobinMap}

IPython/parallel/client/remotefunction.py

0 +46 -28

             """Remote Functions and decorators for Views.
             Authors:
             * Brian Granger
             * Min RK
             """
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010-2011  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             from __future__ import division
             import sys
             import warnings
             from IPython.external.decorator import decorator
             from IPython.testing.skipdoctest import skip_doctest
             from . import map as Map
             from .asyncresult import AsyncMapResult
             #-----------------------------------------------------------------------------
             # Functions and Decorators
             #-----------------------------------------------------------------------------
             @skip_doctest
             def remote(view, block=None, **flags):
                 """Turn a function into a remote function.
                 This method can be used for map:
                 In [1]: @remote(view,block=True)
                    ...: def func(a):
                    ...:    pass
                 """
                 def remote_function(f):
                     return RemoteFunction(view, f, block=block, **flags)
                 return remote_function
             @skip_doctest
             def parallel(view, dist='b', block=None, ordered=True, **flags):
                 """Turn a function into a parallel remote function.
                 This method can be used for map:
                 In [1]: @parallel(view, block=True)
                    ...: def func(a):
                    ...:    pass
                 """
                 def parallel_function(f):
                     return ParallelFunction(view, f, dist=dist, block=block, ordered=ordered, **flags)
                 return parallel_function
             def getname(f):
                 """Get the name of an object.
                 For use in case of callables that are not functions, and
                 thus may not have __name__ defined.
                 Order: f.__name__ >  f.name > str(f)
                 """
                 try:
                     return f.__name__
                 except:
                     pass
                 try:
                     return f.name
                 except:
                     pass
                 return str(f)
             @decorator
             def sync_view_results(f, self, *args, **kwargs):
                 """sync relevant results from self.client to our results attribute.
                 This is a clone of view.sync_results, but for remote functions
                 """
                 view = self.view
                 if view._in_sync_results:
                     return f(self, *args, **kwargs)
                 print 'in sync results', f
                 view._in_sync_results = True
                 try:
                     ret = f(self, *args, **kwargs)
                 finally:
                     view._in_sync_results = False
                     view._sync_results()
                 return ret
             #--------------------------------------------------------------------------
             # Classes
             #--------------------------------------------------------------------------
             class RemoteFunction(object):
                 """Turn an existing function into a remote function.
                 Parameters
                 ----------
                 view : View instance
                     The view to be used for execution
                 f : callable
                     The function to be wrapped into a remote function
                 block : bool [default: None]
                     Whether to wait for results or not.  The default behavior is
                     to use the current `block` attribute of `view`
                 **flags : remaining kwargs are passed to View.temp_flags
                 """
                 view = None # the remote connection
                 func = None # the wrapped function
                 block = None # whether to block
                 flags = None # dict of extra kwargs for temp_flags
                 def __init__(self, view, f, block=None, **flags):
                     self.view = view
                     self.func = f
                     self.block=block
                     self.flags=flags
                 def __call__(self, *args, **kwargs):
                     block = self.view.block if self.block is None else self.block
                     with self.view.temp_flags(block=block, **self.flags):
                         return self.view.apply(self.func, *args, **kwargs)
             class ParallelFunction(RemoteFunction):
                 """Class for mapping a function to sequences.
                 This will distribute the sequences according the a mapper, and call
                 the function on each sub-sequence.  If called via map, then the function
                 will be called once on each element, rather that each sub-sequence.
                 Parameters
                 ----------
                 view : View instance
                     The view to be used for execution
                 f : callable
                     The function to be wrapped into a remote function
                 dist : str [default: 'b']
                     The key for which mapObject to use to distribute sequences
                     options are:
                       * 'b' : use contiguous chunks in order
                       * 'r' : use round-robin striping
                 block : bool [default: None]
                     Whether to wait for results or not.  The default behavior is
                     to use the current `block` attribute of `view`
                 chunksize : int or None
                     The size of chunk to use when breaking up sequences in a load-balanced manner
                 ordered : bool [default: True]
-                    Whether
+                    Whether the result should be kept in order. If False,
+                    results become available as they arrive, regardless of submission order.
                 **flags : remaining kwargs are passed to View.temp_flags
                 """
-                chunksize=None
+                chunksize = None
-                ordered=None
+                ordered = None
-                mapObject=None
+                mapObject = None
+                _mapping = False
                 def __init__(self, view, f, dist='b', block=None, chunksize=None, ordered=True, **flags):
                     super(ParallelFunction, self).__init__(view, f, block=block, **flags)
                     self.chunksize = chunksize
                     self.ordered = ordered
                     mapClass = Map.dists[dist]
                     self.mapObject = mapClass()
                 @sync_view_results
                 def __call__(self, *sequences):
                     client = self.view.client
+                    lens = []
+                    maxlen = minlen = -1
+                    for i, seq in enumerate(sequences):
+                        try:
+                            n = len(seq)
+                        except Exception:
+                            seq = list(seq)
+                            if isinstance(sequences, tuple):
+                                # can't alter a tuple
+                                sequences = list(sequences)
+                            sequences[i] = seq
+                            n = len(seq)
+                        if n > maxlen:
+                            maxlen = n
+                        if minlen == -1 or n < minlen:
+                            minlen = n
+                        lens.append(n)
                     # check that the length of sequences match
-                    len_0 = len(sequences[0])
+                    if not self._mapping and minlen != maxlen:
-                    for s in sequences:
+                        msg = 'all sequences must have equal length, but have %s' % lens
-                        if len(s)!=len_0:
+                        raise ValueError(msg)
-                            msg = 'all sequences must have equal length, but %i!=%i'%(len_0,len(s))
-                            raise ValueError(msg)
                     balanced = 'Balanced' in self.view.__class__.__name__
                     if balanced:
                         if self.chunksize:
-                            nparts = len_0//self.chunksize + int(len_0%self.chunksize > 0)
+                            nparts = maxlen // self.chunksize + int(maxlen % self.chunksize > 0)
                         else:
-                            nparts = len_0
+                            nparts = maxlen
                         targets = [None]*nparts
                     else:
                         if self.chunksize:
                             warnings.warn("`chunksize` is ignored unless load balancing", UserWarning)
                         # multiplexed:
                         targets = self.view.targets
                         # 'all' is lazily evaluated at execution time, which is now:
                         if targets == 'all':
                             targets = client._build_targets(targets)[1]
                         elif isinstance(targets, int):
                             # single-engine view, targets must be iterable
                             targets = [targets]
                         nparts = len(targets)
                     msg_ids = []
                     for index, t in enumerate(targets):
                         args = []
                         for seq in sequences:
-                            part = self.mapObject.getPartition(seq, index, nparts)
+                            part = self.mapObject.getPartition(seq, index, nparts, maxlen)
-                            if len(part) == 0:
+                            args.append(part)
-                                continue
+                        if not any(args):
-                            else:
-                                args.append(part)
-                        if not args:
                             continue
-                        # print (args)
+                        if self._mapping:
-                        if hasattr(self, '_map'):
                             if sys.version_info[0] >= 3:
                                 f = lambda f, *sequences: list(map(f, *sequences))
                             else:
                                 f = map
-                            args = [self.func]+args
+                            args = [self.func] + args
                         else:
                             f=self.func
                         view = self.view if balanced else client[t]
                         with view.temp_flags(block=False, **self.flags):
                             ar = view.apply(f, *args)
-                        msg_ids.append(ar.msg_ids[0])
+                        msg_ids.extend(ar.msg_ids)
                     r = AsyncMapResult(self.view.client, msg_ids, self.mapObject,
                                         fname=getname(self.func),
                                         ordered=self.ordered
                                     )
                     if self.block:
                         try:
                             return r.get()
                         except KeyboardInterrupt:
                             return r
                     else:
                         return r
                 def map(self, *sequences):
-                    """call a function on each element of a sequence remotely.
+                    """call a function on each element of one or more sequence(s) remotely.
                     This should behave very much like the builtin map, but return an AsyncMapResult
                     if self.block is False.
+                    That means it can take generators (will be cast to lists locally),
+                    and mismatched sequence lengths will be padded with None.
                     """
-                    # set _map as a flag for use inside self.__call__
+                    # set _mapping as a flag for use inside self.__call__
-                    self._map = True
+                    self._mapping = True
                     try:
-                        ret = self.__call__(*sequences)
+                        ret = self(*sequences)
                     finally:
-                        del self._map
+                        self._mapping = False
                     return ret
             __all__ = ['remote', 'parallel', 'RemoteFunction', 'ParallelFunction']

IPython/parallel/tests/test_lbview.py

0 +34 0

             # -*- coding: utf-8 -*-
             """test LoadBalancedView objects
             Authors:
             * Min RK
             """
             #-------------------------------------------------------------------------------
             #  Copyright (C) 2011  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-------------------------------------------------------------------------------
             #-------------------------------------------------------------------------------
             # Imports
             #-------------------------------------------------------------------------------
             import sys
             import time
             import zmq
             from nose import SkipTest
             from nose.plugins.attrib import attr
             from IPython import parallel  as pmod
             from IPython.parallel import error
             from IPython.parallel.tests import add_engines
             from .clienttest import ClusterTestCase, crash, wait, skip_without
             def setup():
                 add_engines(3, total=True)
             class TestLoadBalancedView(ClusterTestCase):
                 def setUp(self):
                     ClusterTestCase.setUp(self)
                     self.view = self.client.load_balanced_view()
                 @attr('crash')
                 def test_z_crash_task(self):
                     """test graceful handling of engine death (balanced)"""
                     # self.add_engines(1)
                     ar = self.view.apply_async(crash)
                     self.assertRaisesRemote(error.EngineError, ar.get, 10)
                     eid = ar.engine_id
                     tic = time.time()
                     while eid in self.client.ids and time.time()-tic < 5:
                         time.sleep(.01)
                         self.client.spin()
                     self.assertFalse(eid in self.client.ids, "Engine should have died")
                 def test_map(self):
                     def f(x):
                         return x**2
                     data = range(16)
                     r = self.view.map_sync(f, data)
                     self.assertEqual(r, map(f, data))
+                def test_map_generator(self):
+                    def f(x):
+                        return x**2
+                    data = range(16)
+                    r = self.view.map_sync(f, iter(data))
+                    self.assertEqual(r, map(f, iter(data)))
+                def test_map_short_first(self):
+                    def f(x,y):
+                        if y is None:
+                            return y
+                        if x is None:
+                            return x
+                        return x*y
+                    data = range(10)
+                    data2 = range(4)
+                    r = self.view.map_sync(f, data, data2)
+                    self.assertEqual(r, map(f, data, data2))
+                def test_map_short_last(self):
+                    def f(x,y):
+                        if y is None:
+                            return y
+                        if x is None:
+                            return x
+                        return x*y
+                    data = range(4)
+                    data2 = range(10)
+                    r = self.view.map_sync(f, data, data2)
+                    self.assertEqual(r, map(f, data, data2))
                 def test_map_unordered(self):
                     def f(x):
                         return x**2
                     def slow_f(x):
                         import time
                         time.sleep(0.05*x)
                         return x**2
                     data = range(16,0,-1)
                     reference = map(f, data)
                     amr = self.view.map_async(slow_f, data, ordered=False)
                     self.assertTrue(isinstance(amr, pmod.AsyncMapResult))
                     # check individual elements, retrieved as they come
                     # list comprehension uses __iter__
                     astheycame = [ r for r in amr ]
                     # Ensure that at least one result came out of order:
                     self.assertNotEqual(astheycame, reference, "should not have preserved order")
                     self.assertEqual(sorted(astheycame, reverse=True), reference, "result corrupted")
                 def test_map_ordered(self):
                     def f(x):
                         return x**2
                     def slow_f(x):
                         import time
                         time.sleep(0.05*x)
                         return x**2
                     data = range(16,0,-1)
                     reference = map(f, data)
                     amr = self.view.map_async(slow_f, data)
                     self.assertTrue(isinstance(amr, pmod.AsyncMapResult))
                     # check individual elements, retrieved as they come
                     # list(amr) uses __iter__
                     astheycame = list(amr)
                     # Ensure that results came in order
                     self.assertEqual(astheycame, reference)
                     self.assertEqual(amr.result, reference)
                 def test_map_iterable(self):
                     """test map on iterables (balanced)"""
                     view = self.view
                     # 101 is prime, so it won't be evenly distributed
                     arr = range(101)
                     # so that it will be an iterator, even in Python 3
                     it = iter(arr)
                     r = view.map_sync(lambda x:x, arr)
                     self.assertEqual(r, list(arr))
                 def test_abort(self):
                     view = self.view
                     ar = self.client[:].apply_async(time.sleep, .5)
                     ar = self.client[:].apply_async(time.sleep, .5)
                     time.sleep(0.2)
                     ar2 = view.apply_async(lambda : 2)
                     ar3 = view.apply_async(lambda : 3)
                     view.abort(ar2)
                     view.abort(ar3.msg_ids)
                     self.assertRaises(error.TaskAborted, ar2.get)
                     self.assertRaises(error.TaskAborted, ar3.get)
                 def test_retries(self):
                     view = self.view
                     view.timeout = 1 # prevent hang if this doesn't behave
                     def fail():
                         assert False
                     for r in range(len(self.client)-1):
                         with view.temp_flags(retries=r):
                             self.assertRaisesRemote(AssertionError, view.apply_sync, fail)
                     with view.temp_flags(retries=len(self.client), timeout=0.25):
                         self.assertRaisesRemote(error.TaskTimeout, view.apply_sync, fail)
                 def test_invalid_dependency(self):
                     view = self.view
                     with view.temp_flags(after='12345'):
                         self.assertRaisesRemote(error.InvalidDependency, view.apply_sync, lambda : 1)
                 def test_impossible_dependency(self):
                     self.minimum_engines(2)
                     view = self.client.load_balanced_view()
                     ar1 = view.apply_async(lambda : 1)
                     ar1.get()
                     e1 = ar1.engine_id
                     e2 = e1
                     while e2 == e1:
                         ar2 = view.apply_async(lambda : 1)
                         ar2.get()
                         e2 = ar2.engine_id
                     with view.temp_flags(follow=[ar1, ar2]):
                         self.assertRaisesRemote(error.ImpossibleDependency, view.apply_sync, lambda : 1)
                 def test_follow(self):
                     ar = self.view.apply_async(lambda : 1)
                     ar.get()
                     ars = []
                     first_id = ar.engine_id
                     self.view.follow = ar
                     for i in range(5):
                         ars.append(self.view.apply_async(lambda : 1))
                     self.view.wait(ars)
                     for ar in ars:
                         self.assertEqual(ar.engine_id, first_id)
                 def test_after(self):
                     view = self.view
                     ar = view.apply_async(time.sleep, 0.5)
                     with view.temp_flags(after=ar):
                         ar2 = view.apply_async(lambda : 1)
                     ar.wait()
                     ar2.wait()
                     self.assertTrue(ar2.started >= ar.completed, "%s not >= %s"%(ar.started, ar.completed))

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages