Show More
@@ -885,7 +885,62 b' class FullBlockingMultiEngineClient(InteractiveMultiEngineClient):' | |||||
885 | targets, block = self._findTargetsAndBlock(targets, block) |
|
885 | targets, block = self._findTargetsAndBlock(targets, block) | |
886 | return self._blockFromThread(self.smultiengine.run, filename, |
|
886 | return self._blockFromThread(self.smultiengine.run, filename, | |
887 | targets=targets, block=block) |
|
887 | targets=targets, block=block) | |
|
888 | ||||
|
889 | def benchmark(self, push_size=10000): | |||
|
890 | """ | |||
|
891 | Run performance benchmarks for the current IPython cluster. | |||
|
892 | ||||
|
893 | This method tests both the latency of sending command and data to the | |||
|
894 | engines as well as the throughput of sending large objects to the | |||
|
895 | engines using push. The latency is measured by having one or more | |||
|
896 | engines execute the command 'pass'. The throughput is measure by | |||
|
897 | sending an NumPy array of size `push_size` to one or more engines. | |||
|
898 | ||||
|
899 | These benchmarks will vary widely on different hardware and networks | |||
|
900 | and thus can be used to get an idea of the performance characteristics | |||
|
901 | of a particular configuration of an IPython controller and engines. | |||
|
902 | ||||
|
903 | This function is not testable within our current testing framework. | |||
|
904 | """ | |||
|
905 | import timeit, __builtin__ | |||
|
906 | __builtin__._mec_self = self | |||
|
907 | benchmarks = {} | |||
|
908 | repeat = 3 | |||
|
909 | count = 10 | |||
|
910 | ||||
|
911 | timer = timeit.Timer('_mec_self.execute("pass",0)') | |||
|
912 | result = 1000*min(timer.repeat(repeat,count))/count | |||
|
913 | benchmarks['single_engine_latency'] = (result,'msec') | |||
|
914 | ||||
|
915 | timer = timeit.Timer('_mec_self.execute("pass")') | |||
|
916 | result = 1000*min(timer.repeat(repeat,count))/count | |||
|
917 | benchmarks['all_engine_latency'] = (result,'msec') | |||
888 |
|
918 | |||
|
919 | try: | |||
|
920 | import numpy as np | |||
|
921 | except: | |||
|
922 | pass | |||
|
923 | else: | |||
|
924 | timer = timeit.Timer( | |||
|
925 | "_mec_self.push(d)", | |||
|
926 | "import numpy as np; d = dict(a=np.zeros(%r,dtype='float64'))" % push_size | |||
|
927 | ) | |||
|
928 | result = min(timer.repeat(repeat,count))/count | |||
|
929 | benchmarks['all_engine_push'] = (1e-6*push_size*8/result, 'MB/sec') | |||
|
930 | ||||
|
931 | try: | |||
|
932 | import numpy as np | |||
|
933 | except: | |||
|
934 | pass | |||
|
935 | else: | |||
|
936 | timer = timeit.Timer( | |||
|
937 | "_mec_self.push(d,0)", | |||
|
938 | "import numpy as np; d = dict(a=np.zeros(%r,dtype='float64'))" % push_size | |||
|
939 | ) | |||
|
940 | result = min(timer.repeat(repeat,count))/count | |||
|
941 | benchmarks['single_engine_push'] = (1e-6*push_size*8/result, 'MB/sec') | |||
|
942 | ||||
|
943 | return benchmarks | |||
889 |
|
944 | |||
890 |
|
945 | |||
891 | components.registerAdapter(FullBlockingMultiEngineClient, |
|
946 | components.registerAdapter(FullBlockingMultiEngineClient, |
@@ -478,15 +478,31 b' Try running ipcluster with the -xy flags: ipcluster local -xy -n 4""")' | |||||
478 | cont_args.append('-y') |
|
478 | cont_args.append('-y') | |
479 | return True |
|
479 | return True | |
480 |
|
480 | |||
|
481 | def check_reuse(args, cont_args): | |||
|
482 | if args.r: | |||
|
483 | cont_args.append('-r') | |||
|
484 | if args.client_port == 0 or args.engine_port == 0: | |||
|
485 | log.err(""" | |||
|
486 | To reuse FURL files, you must also set the client and engine ports using | |||
|
487 | the --client-port and --engine-port options.""") | |||
|
488 | reactor.stop() | |||
|
489 | return False | |||
|
490 | cont_args.append('--client-port=%i' % args.client_port) | |||
|
491 | cont_args.append('--engine-port=%i' % args.engine_port) | |||
|
492 | return True | |||
481 |
|
493 | |||
482 | def main_local(args): |
|
494 | def main_local(args): | |
483 | cont_args = [] |
|
495 | cont_args = [] | |
484 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) |
|
496 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) | |
485 |
|
497 | |||
486 | # Check security settings before proceeding |
|
498 | # Check security settings before proceeding | |
487 | if not check_security(args, cont_args): |
|
499 | if not check_security(args, cont_args): | |
488 | return |
|
500 | return | |
489 |
|
501 | |||
|
502 | # See if we are reusing FURL files | |||
|
503 | if not check_reuse(args, cont_args): | |||
|
504 | return | |||
|
505 | ||||
490 | cl = ControllerLauncher(extra_args=cont_args) |
|
506 | cl = ControllerLauncher(extra_args=cont_args) | |
491 | dstart = cl.start() |
|
507 | dstart = cl.start() | |
492 | def start_engines(cont_pid): |
|
508 | def start_engines(cont_pid): | |
@@ -513,18 +529,22 b' def main_local(args):' | |||||
513 | dstart.addErrback(lambda f: f.raiseException()) |
|
529 | dstart.addErrback(lambda f: f.raiseException()) | |
514 |
|
530 | |||
515 |
|
531 | |||
516 |
def main_mpi |
|
532 | def main_mpi(args): | |
517 | cont_args = [] |
|
533 | cont_args = [] | |
518 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) |
|
534 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) | |
519 |
|
535 | |||
520 | # Check security settings before proceeding |
|
536 | # Check security settings before proceeding | |
521 | if not check_security(args, cont_args): |
|
537 | if not check_security(args, cont_args): | |
522 | return |
|
538 | return | |
523 |
|
539 | |||
|
540 | # See if we are reusing FURL files | |||
|
541 | if not check_reuse(args, cont_args): | |||
|
542 | return | |||
|
543 | ||||
524 | cl = ControllerLauncher(extra_args=cont_args) |
|
544 | cl = ControllerLauncher(extra_args=cont_args) | |
525 | dstart = cl.start() |
|
545 | dstart = cl.start() | |
526 | def start_engines(cont_pid): |
|
546 | def start_engines(cont_pid): | |
527 |
raw_args = [ |
|
547 | raw_args = [args.cmd] | |
528 | raw_args.extend(['-n',str(args.n)]) |
|
548 | raw_args.extend(['-n',str(args.n)]) | |
529 | raw_args.append('ipengine') |
|
549 | raw_args.append('ipengine') | |
530 | raw_args.append('-l') |
|
550 | raw_args.append('-l') | |
@@ -554,11 +574,15 b' def main_mpirun(args):' | |||||
554 | def main_pbs(args): |
|
574 | def main_pbs(args): | |
555 | cont_args = [] |
|
575 | cont_args = [] | |
556 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) |
|
576 | cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) | |
557 |
|
577 | |||
558 | # Check security settings before proceeding |
|
578 | # Check security settings before proceeding | |
559 | if not check_security(args, cont_args): |
|
579 | if not check_security(args, cont_args): | |
560 | return |
|
580 | return | |
561 |
|
581 | |||
|
582 | # See if we are reusing FURL files | |||
|
583 | if not check_reuse(args, cont_args): | |||
|
584 | return | |||
|
585 | ||||
562 | cl = ControllerLauncher(extra_args=cont_args) |
|
586 | cl = ControllerLauncher(extra_args=cont_args) | |
563 | dstart = cl.start() |
|
587 | dstart = cl.start() | |
564 | def start_engines(r): |
|
588 | def start_engines(r): | |
@@ -598,13 +622,16 b' def main_ssh(args):' | |||||
598 | if not check_security(args, cont_args): |
|
622 | if not check_security(args, cont_args): | |
599 | return |
|
623 | return | |
600 |
|
624 | |||
|
625 | # See if we are reusing FURL files | |||
|
626 | if not check_reuse(args, cont_args): | |||
|
627 | return | |||
|
628 | ||||
601 | cl = ControllerLauncher(extra_args=cont_args) |
|
629 | cl = ControllerLauncher(extra_args=cont_args) | |
602 | dstart = cl.start() |
|
630 | dstart = cl.start() | |
603 | def start_engines(cont_pid): |
|
631 | def start_engines(cont_pid): | |
604 | ssh_set = SSHEngineSet(clusterfile['engines'], sshx=args.sshx) |
|
632 | ssh_set = SSHEngineSet(clusterfile['engines'], sshx=args.sshx) | |
605 | def shutdown(signum, frame): |
|
633 | def shutdown(signum, frame): | |
606 | d = ssh_set.kill() |
|
634 | d = ssh_set.kill() | |
607 | # d.addErrback(log.err) |
|
|||
608 | cl.interrupt_then_kill(1.0) |
|
635 | cl.interrupt_then_kill(1.0) | |
609 | reactor.callLater(2.0, reactor.stop) |
|
636 | reactor.callLater(2.0, reactor.stop) | |
610 | signal.signal(signal.SIGINT,shutdown) |
|
637 | signal.signal(signal.SIGINT,shutdown) | |
@@ -621,6 +648,26 b' def main_ssh(args):' | |||||
621 | def get_args(): |
|
648 | def get_args(): | |
622 | base_parser = argparse.ArgumentParser(add_help=False) |
|
649 | base_parser = argparse.ArgumentParser(add_help=False) | |
623 | base_parser.add_argument( |
|
650 | base_parser.add_argument( | |
|
651 | '-r', | |||
|
652 | action='store_true', | |||
|
653 | dest='r', | |||
|
654 | help='try to reuse FURL files. Use with --client-port and --engine-port' | |||
|
655 | ) | |||
|
656 | base_parser.add_argument( | |||
|
657 | '--client-port', | |||
|
658 | type=int, | |||
|
659 | dest='client_port', | |||
|
660 | help='the port the controller will listen on for client connections', | |||
|
661 | default=0 | |||
|
662 | ) | |||
|
663 | base_parser.add_argument( | |||
|
664 | '--engine-port', | |||
|
665 | type=int, | |||
|
666 | dest='engine_port', | |||
|
667 | help='the port the controller will listen on for engine connections', | |||
|
668 | default=0 | |||
|
669 | ) | |||
|
670 | base_parser.add_argument( | |||
624 | '-x', |
|
671 | '-x', | |
625 | action='store_true', |
|
672 | action='store_true', | |
626 | dest='x', |
|
673 | dest='x', | |
@@ -665,7 +712,7 b' def get_args():' | |||||
665 |
|
712 | |||
666 | parser_mpirun = subparsers.add_parser( |
|
713 | parser_mpirun = subparsers.add_parser( | |
667 | 'mpirun', |
|
714 | 'mpirun', | |
668 | help='run a cluster using mpirun', |
|
715 | help='run a cluster using mpirun (mpiexec also works)', | |
669 | parents=[base_parser] |
|
716 | parents=[base_parser] | |
670 | ) |
|
717 | ) | |
671 | parser_mpirun.add_argument( |
|
718 | parser_mpirun.add_argument( | |
@@ -674,7 +721,20 b' def get_args():' | |||||
674 | dest="mpi", # Don't put a default here to allow no MPI support |
|
721 | dest="mpi", # Don't put a default here to allow no MPI support | |
675 | help="how to call MPI_Init (default=mpi4py)" |
|
722 | help="how to call MPI_Init (default=mpi4py)" | |
676 | ) |
|
723 | ) | |
677 | parser_mpirun.set_defaults(func=main_mpirun) |
|
724 | parser_mpirun.set_defaults(func=main_mpi, cmd='mpirun') | |
|
725 | ||||
|
726 | parser_mpiexec = subparsers.add_parser( | |||
|
727 | 'mpiexec', | |||
|
728 | help='run a cluster using mpiexec (mpirun also works)', | |||
|
729 | parents=[base_parser] | |||
|
730 | ) | |||
|
731 | parser_mpiexec.add_argument( | |||
|
732 | "--mpi", | |||
|
733 | type=str, | |||
|
734 | dest="mpi", # Don't put a default here to allow no MPI support | |||
|
735 | help="how to call MPI_Init (default=mpi4py)" | |||
|
736 | ) | |||
|
737 | parser_mpiexec.set_defaults(func=main_mpi, cmd='mpiexec') | |||
678 |
|
738 | |||
679 | parser_pbs = subparsers.add_parser( |
|
739 | parser_pbs = subparsers.add_parser( | |
680 | 'pbs', |
|
740 | 'pbs', |
@@ -32,34 +32,34 b' Starting the engines with MPI enabled' | |||||
32 | To use code that calls MPI, there are typically two things that MPI requires. |
|
32 | To use code that calls MPI, there are typically two things that MPI requires. | |
33 |
|
33 | |||
34 | 1. The process that wants to call MPI must be started using |
|
34 | 1. The process that wants to call MPI must be started using | |
35 |
:command:`mpi |
|
35 | :command:`mpiexec` or a batch system (like PBS) that has MPI support. | |
36 | 2. Once the process starts, it must call :func:`MPI_Init`. |
|
36 | 2. Once the process starts, it must call :func:`MPI_Init`. | |
37 |
|
37 | |||
38 | There are a couple of ways that you can start the IPython engines and get these things to happen. |
|
38 | There are a couple of ways that you can start the IPython engines and get these things to happen. | |
39 |
|
39 | |||
40 |
Automatic starting using :command:`mpi |
|
40 | Automatic starting using :command:`mpiexec` and :command:`ipcluster` | |
41 | ------------------------------------------------------------------- |
|
41 | ------------------------------------------------------------------- | |
42 |
|
42 | |||
43 |
The easiest approach is to use the `mpi |
|
43 | The easiest approach is to use the `mpiexec` mode of :command:`ipcluster`, which will first start a controller and then a set of engines using :command:`mpiexec`:: | |
44 |
|
44 | |||
45 |
$ ipcluster mpi |
|
45 | $ ipcluster mpiexec -n 4 | |
46 |
|
46 | |||
47 | This approach is best as interrupting :command:`ipcluster` will automatically |
|
47 | This approach is best as interrupting :command:`ipcluster` will automatically | |
48 | stop and clean up the controller and engines. |
|
48 | stop and clean up the controller and engines. | |
49 |
|
49 | |||
50 |
Manual starting using :command:`mpi |
|
50 | Manual starting using :command:`mpiexec` | |
51 | --------------------------------------- |
|
51 | --------------------------------------- | |
52 |
|
52 | |||
53 |
If you want to start the IPython engines using the :command:`mpi |
|
53 | If you want to start the IPython engines using the :command:`mpiexec`, just do:: | |
54 |
|
54 | |||
55 |
$ mpi |
|
55 | $ mpiexec -n 4 ipengine --mpi=mpi4py | |
56 |
|
56 | |||
57 | This requires that you already have a controller running and that the FURL |
|
57 | This requires that you already have a controller running and that the FURL | |
58 | files for the engines are in place. We also have built in support for |
|
58 | files for the engines are in place. We also have built in support for | |
59 | PyTrilinos [PyTrilinos]_, which can be used (assuming is installed) by |
|
59 | PyTrilinos [PyTrilinos]_, which can be used (assuming is installed) by | |
60 | starting the engines with:: |
|
60 | starting the engines with:: | |
61 |
|
61 | |||
62 |
mpi |
|
62 | mpiexec -n 4 ipengine --mpi=pytrilinos | |
63 |
|
63 | |||
64 | Automatic starting using PBS and :command:`ipcluster` |
|
64 | Automatic starting using PBS and :command:`ipcluster` | |
65 | ----------------------------------------------------- |
|
65 | ----------------------------------------------------- | |
@@ -84,7 +84,7 b' First, lets define a simply function that uses MPI to calculate the sum of a dis' | |||||
84 |
|
84 | |||
85 | Now, start an IPython cluster in the same directory as :file:`psum.py`:: |
|
85 | Now, start an IPython cluster in the same directory as :file:`psum.py`:: | |
86 |
|
86 | |||
87 |
$ ipcluster mpi |
|
87 | $ ipcluster mpiexec -n 4 | |
88 |
|
88 | |||
89 | Finally, connect to the cluster and use this function interactively. In this case, we create a random array on each engine and sum up all the random arrays using our :func:`psum` function: |
|
89 | Finally, connect to the cluster and use this function interactively. In this case, we create a random array on each engine and sum up all the random arrays using our :func:`psum` function: | |
90 |
|
90 |
@@ -85,33 +85,40 b' To see other command line options for the local mode, do::' | |||||
85 |
|
85 | |||
86 | $ ipcluster local -h |
|
86 | $ ipcluster local -h | |
87 |
|
87 | |||
88 | Using :command:`ipcluster` in mpirun mode |
|
88 | Using :command:`ipcluster` in mpiexec/mpirun mode | |
89 | ----------------------------------------- |
|
89 | ------------------------------------------------- | |
90 |
|
90 | |||
91 | The mpirun mode is useful if you: |
|
91 | The mpiexec/mpirun mode is useful if you: | |
92 |
|
92 | |||
93 | 1. Have MPI installed. |
|
93 | 1. Have MPI installed. | |
94 |
2. Your systems are configured to use the :command:`mpi |
|
94 | 2. Your systems are configured to use the :command:`mpiexec` or | |
95 | processes. |
|
95 | :command:`mpirun` commands to start MPI processes. | |
|
96 | ||||
|
97 | .. note:: | |||
|
98 | ||||
|
99 | The preferred command to use is :command:`mpiexec`. However, we also | |||
|
100 | support :command:`mpirun` for backwards compatibility. The underlying | |||
|
101 | logic used is exactly the same, the only difference being the name of the | |||
|
102 | command line program that is called. | |||
96 |
|
103 | |||
97 | If these are satisfied, you can start an IPython cluster using:: |
|
104 | If these are satisfied, you can start an IPython cluster using:: | |
98 |
|
105 | |||
99 |
$ ipcluster mpi |
|
106 | $ ipcluster mpiexec -n 4 | |
100 |
|
107 | |||
101 | This does the following: |
|
108 | This does the following: | |
102 |
|
109 | |||
103 | 1. Starts the IPython controller on current host. |
|
110 | 1. Starts the IPython controller on current host. | |
104 |
2. Uses :command:`mpi |
|
111 | 2. Uses :command:`mpiexec` to start 4 engines. | |
105 |
|
112 | |||
106 | On newer MPI implementations (such as OpenMPI), this will work even if you don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI implementations actually require each process to call :func:`MPI_Init` upon starting. The easiest way of having this done is to install the mpi4py [mpi4py]_ package and then call ipcluster with the ``--mpi`` option:: |
|
113 | On newer MPI implementations (such as OpenMPI), this will work even if you don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI implementations actually require each process to call :func:`MPI_Init` upon starting. The easiest way of having this done is to install the mpi4py [mpi4py]_ package and then call ipcluster with the ``--mpi`` option:: | |
107 |
|
114 | |||
108 |
$ ipcluster mpi |
|
115 | $ ipcluster mpiexec -n 4 --mpi=mpi4py | |
109 |
|
116 | |||
110 | Unfortunately, even this won't work for some MPI implementations. If you are having problems with this, you will likely have to use a custom Python executable that itself calls :func:`MPI_Init` at the appropriate time. Fortunately, mpi4py comes with such a custom Python executable that is easy to install and use. However, this custom Python executable approach will not work with :command:`ipcluster` currently. |
|
117 | Unfortunately, even this won't work for some MPI implementations. If you are having problems with this, you will likely have to use a custom Python executable that itself calls :func:`MPI_Init` at the appropriate time. Fortunately, mpi4py comes with such a custom Python executable that is easy to install and use. However, this custom Python executable approach will not work with :command:`ipcluster` currently. | |
111 |
|
118 | |||
112 | Additional command line options for this mode can be found by doing:: |
|
119 | Additional command line options for this mode can be found by doing:: | |
113 |
|
120 | |||
114 |
$ ipcluster mpi |
|
121 | $ ipcluster mpiexec -h | |
115 |
|
122 | |||
116 | More details on using MPI with IPython can be found :ref:`here <parallelmpi>`. |
|
123 | More details on using MPI with IPython can be found :ref:`here <parallelmpi>`. | |
117 |
|
124 | |||
@@ -301,6 +308,11 b' This is possible. The only thing you have to do is decide what ports the contro' | |||||
301 |
|
308 | |||
302 | $ ipcontroller -r --client-port=10101 --engine-port=10102 |
|
309 | $ ipcontroller -r --client-port=10101 --engine-port=10102 | |
303 |
|
310 | |||
|
311 | These options also work with all of the various modes of | |||
|
312 | :command:`ipcluster`:: | |||
|
313 | ||||
|
314 | $ ipcluster local -n 2 -r --client-port=10101 --engine-port=10102 | |||
|
315 | ||||
304 | Then, just copy the furl files over the first time and you are set. You can start and stop the controller and engines any many times as you want in the future, just make sure to tell the controller to use the *same* ports. |
|
316 | Then, just copy the furl files over the first time and you are set. You can start and stop the controller and engines any many times as you want in the future, just make sure to tell the controller to use the *same* ports. | |
305 |
|
317 | |||
306 | .. note:: |
|
318 | .. note:: |
General Comments 0
You need to be logged in to leave comments.
Login now