diff --git a/IPython/external/argparse.py b/IPython/external/argparse.py new file mode 100644 index 0000000..d17290d --- /dev/null +++ b/IPython/external/argparse.py @@ -0,0 +1,1867 @@ +# -*- coding: utf-8 -*- + +# Copyright � 2006 Steven J. Bethard . +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted under the terms of the 3-clause BSD +# license. No warranty expressed or implied. +# For details, see the accompanying file LICENSE.txt. + +"""Command-line parsing library + +This module is an optparse-inspired command-line parsing library that: + +* handles both optional and positional arguments +* produces highly informative usage messages +* supports parsers that dispatch to sub-parsers + +The following is a simple usage example that sums integers from the +command-line and writes the result to a file: + + parser = argparse.ArgumentParser( + description='sum the integers at the command line') + parser.add_argument( + 'integers', metavar='int', nargs='+', type=int, + help='an integer to be summed') + parser.add_argument( + '--log', default=sys.stdout, type=argparse.FileType('w'), + help='the file where the sum should be written') + args = parser.parse_args() + args.log.write('%s' % sum(args.integers)) + args.log.close() + +The module contains the following public classes: + + ArgumentParser -- The main entry point for command-line parsing. As the + example above shows, the add_argument() method is used to populate + the parser with actions for optional and positional arguments. Then + the parse_args() method is invoked to convert the args at the + command-line into an object with attributes. + + ArgumentError -- The exception raised by ArgumentParser objects when + there are errors with the parser's actions. Errors raised while + parsing the command-line are caught by ArgumentParser and emitted + as command-line messages. + + FileType -- A factory for defining types of files to be created. As the + example above shows, instances of FileType are typically passed as + the type= argument of add_argument() calls. + + Action -- The base class for parser actions. Typically actions are + selected by passing strings like 'store_true' or 'append_const' to + the action= argument of add_argument(). However, for greater + customization of ArgumentParser actions, subclasses of Action may + be defined and passed as the action= argument. + + HelpFormatter, RawDescriptionHelpFormatter -- Formatter classes which + may be passed as the formatter_class= argument to the + ArgumentParser constructor. HelpFormatter is the default, while + RawDescriptionHelpFormatter tells the parser not to perform any + line-wrapping on description text. + +All other classes in this module are considered implementation details. +(Also note that HelpFormatter and RawDescriptionHelpFormatter are only +considered public as object names -- the API of the formatter objects is +still considered an implementation detail.) +""" + +__version__ = '0.8.0' + +import os as _os +import re as _re +import sys as _sys +import textwrap as _textwrap + +from gettext import gettext as _ + +SUPPRESS = '==SUPPRESS==' + +OPTIONAL = '?' +ZERO_OR_MORE = '*' +ONE_OR_MORE = '+' +PARSER = '==PARSER==' + +# ============================= +# Utility functions and classes +# ============================= + +class _AttributeHolder(object): + """Abstract base class that provides __repr__. + + The __repr__ method returns a string in the format: + ClassName(attr=name, attr=name, ...) + The attributes are determined either by a class-level attribute, + '_kwarg_names', or by inspecting the instance __dict__. + """ + + def __repr__(self): + type_name = type(self).__name__ + arg_strings = [] + for arg in self._get_args(): + arg_strings.append(repr(arg)) + for name, value in self._get_kwargs(): + arg_strings.append('%s=%r' % (name, value)) + return '%s(%s)' % (type_name, ', '.join(arg_strings)) + + def _get_kwargs(self): + return sorted(self.__dict__.items()) + + def _get_args(self): + return [] + +def _ensure_value(namespace, name, value): + if getattr(namespace, name, None) is None: + setattr(namespace, name, value) + return getattr(namespace, name) + + + +# =============== +# Formatting Help +# =============== + +class HelpFormatter(object): + + def __init__(self, + prog, + indent_increment=2, + max_help_position=24, + width=None): + + # default setting for width + if width is None: + try: + width = int(_os.environ['COLUMNS']) + except (KeyError, ValueError): + width = 80 + width -= 2 + + self._prog = prog + self._indent_increment = indent_increment + self._max_help_position = max_help_position + self._width = width + + self._current_indent = 0 + self._level = 0 + self._action_max_length = 0 + + self._root_section = self._Section(self, None) + self._current_section = self._root_section + + self._whitespace_matcher = _re.compile(r'\s+') + self._long_break_matcher = _re.compile(r'\n\n\n+') + + # =============================== + # Section and indentation methods + # =============================== + + def _indent(self): + self._current_indent += self._indent_increment + self._level += 1 + + def _dedent(self): + self._current_indent -= self._indent_increment + assert self._current_indent >= 0, 'Indent decreased below 0.' + self._level -= 1 + + class _Section(object): + def __init__(self, formatter, parent, heading=None): + self.formatter = formatter + self.parent = parent + self.heading = heading + self.items = [] + + def format_help(self): + # format the indented section + if self.parent is not None: + self.formatter._indent() + join = self.formatter._join_parts + for func, args in self.items: + func(*args) + item_help = join(func(*args) for func, args in self.items) + if self.parent is not None: + self.formatter._dedent() + + # return nothing if the section was empty + if not item_help: + return '' + + # add the heading if the section was non-empty + if self.heading is not SUPPRESS and self.heading is not None: + current_indent = self.formatter._current_indent + heading = '%*s%s:\n' % (current_indent, '', self.heading) + else: + heading = '' + + # join the section-initial newline, the heading and the help + return join(['\n', heading, item_help, '\n']) + + def _add_item(self, func, args): + self._current_section.items.append((func, args)) + + # ======================== + # Message building methods + # ======================== + + def start_section(self, heading): + self._indent() + section = self._Section(self, self._current_section, heading) + self._add_item(section.format_help, []) + self._current_section = section + + def end_section(self): + self._current_section = self._current_section.parent + self._dedent() + + def add_text(self, text): + if text is not SUPPRESS and text is not None: + self._add_item(self._format_text, [text]) + + def add_usage(self, usage, optionals, positionals, prefix=None): + if usage is not SUPPRESS: + args = usage, optionals, positionals, prefix + self._add_item(self._format_usage, args) + + def add_argument(self, action): + if action.help is not SUPPRESS: + + # find all invocations + get_invocation = self._format_action_invocation + invocations = [get_invocation(action)] + for subaction in self._iter_indented_subactions(action): + invocations.append(get_invocation(subaction)) + + # update the maximum item length + invocation_length = max(len(s) for s in invocations) + action_length = invocation_length + self._current_indent + self._action_max_length = max(self._action_max_length, + action_length) + + # add the item to the list + self._add_item(self._format_action, [action]) + + def add_arguments(self, actions): + for action in actions: + self.add_argument(action) + + # ======================= + # Help-formatting methods + # ======================= + + def format_help(self): + help = self._root_section.format_help() % dict(prog=self._prog) + if help: + help = self._long_break_matcher.sub('\n\n', help) + help = help.strip('\n') + '\n' + return help + + def _join_parts(self, part_strings): + return ''.join(part + for part in part_strings + if part and part is not SUPPRESS) + + def _format_usage(self, usage, optionals, positionals, prefix): + if prefix is None: + prefix = _('usage: ') + + # if no optionals or positionals are available, usage is just prog + if usage is None and not optionals and not positionals: + usage = '%(prog)s' + + # if optionals and positionals are available, calculate usage + elif usage is None: + usage = '%(prog)s' % dict(prog=self._prog) + + # determine width of "usage: PROG" and width of text + prefix_width = len(prefix) + len(usage) + 1 + prefix_indent = self._current_indent + prefix_width + text_width = self._width - self._current_indent + + # put them on one line if they're short enough + format = self._format_actions_usage + action_usage = format(optionals + positionals) + if prefix_width + len(action_usage) + 1 < text_width: + usage = '%s %s' % (usage, action_usage) + + # if they're long, wrap optionals and positionals individually + else: + optional_usage = format(optionals) + positional_usage = format(positionals) + indent = ' ' * prefix_indent + + # usage is made of PROG, optionals and positionals + parts = [usage, ' '] + + # options always get added right after PROG + if optional_usage: + parts.append(_textwrap.fill( + optional_usage, text_width, + initial_indent=indent, + subsequent_indent=indent).lstrip()) + + # if there were options, put arguments on the next line + # otherwise, start them right after PROG + if positional_usage: + part = _textwrap.fill( + positional_usage, text_width, + initial_indent=indent, + subsequent_indent=indent).lstrip() + if optional_usage: + part = '\n' + indent + part + parts.append(part) + usage = ''.join(parts) + + # prefix with 'usage:' + return '%s%s\n\n' % (prefix, usage) + + def _format_actions_usage(self, actions): + parts = [] + for action in actions: + if action.help is SUPPRESS: + continue + + # produce all arg strings + if not action.option_strings: + parts.append(self._format_args(action, action.dest)) + + # produce the first way to invoke the option in brackets + else: + option_string = action.option_strings[0] + + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = '%s' % option_string + + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = action.dest.upper() + args_string = self._format_args(action, default) + part = '%s %s' % (option_string, args_string) + + # make it look optional if it's not required + if not action.required: + part = '[%s]' % part + parts.append(part) + + return ' '.join(parts) + + def _format_text(self, text): + text_width = self._width - self._current_indent + indent = ' ' * self._current_indent + return self._fill_text(text, text_width, indent) + '\n\n' + + def _format_action(self, action): + # determine the required width and the entry label + help_position = min(self._action_max_length + 2, + self._max_help_position) + help_width = self._width - help_position + action_width = help_position - self._current_indent - 2 + action_header = self._format_action_invocation(action) + + # ho nelp; start on same line and add a final newline + if not action.help: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + + # short action name; start on the same line and pad two spaces + elif len(action_header) <= action_width: + tup = self._current_indent, '', action_width, action_header + action_header = '%*s%-*s ' % tup + indent_first = 0 + + # long action name; start on the next line + else: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + indent_first = help_position + + # collect the pieces of the action help + parts = [action_header] + + # if there was help for the action, add lines of help text + if action.help: + help_text = self._expand_help(action) + help_lines = self._split_lines(help_text, help_width) + parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) + for line in help_lines[1:]: + parts.append('%*s%s\n' % (help_position, '', line)) + + # or add a newline if the description doesn't end with one + elif not action_header.endswith('\n'): + parts.append('\n') + + # if there are any sub-actions, add their help as well + for subaction in self._iter_indented_subactions(action): + parts.append(self._format_action(subaction)) + + # return a single string + return self._join_parts(parts) + + def _format_action_invocation(self, action): + if not action.option_strings: + return self._format_metavar(action, action.dest) + + else: + parts = [] + + # if the Optional doesn't take a value, format is: + # -s, --long + if action.nargs == 0: + parts.extend(action.option_strings) + + # if the Optional takes a value, format is: + # -s ARGS, --long ARGS + else: + default = action.dest.upper() + args_string = self._format_args(action, default) + for option_string in action.option_strings: + parts.append('%s %s' % (option_string, args_string)) + + return ', '.join(parts) + + def _format_metavar(self, action, default_metavar): + if action.metavar is not None: + name = action.metavar + elif action.choices is not None: + choice_strs = (str(choice) for choice in action.choices) + name = '{%s}' % ','.join(choice_strs) + else: + name = default_metavar + return name + + def _format_args(self, action, default_metavar): + name = self._format_metavar(action, default_metavar) + if action.nargs is None: + result = name + elif action.nargs == OPTIONAL: + result = '[%s]' % name + elif action.nargs == ZERO_OR_MORE: + result = '[%s [%s ...]]' % (name, name) + elif action.nargs == ONE_OR_MORE: + result = '%s [%s ...]' % (name, name) + elif action.nargs is PARSER: + result = '%s ...' % name + else: + result = ' '.join([name] * action.nargs) + return result + + def _expand_help(self, action): + params = dict(vars(action), prog=self._prog) + for name, value in params.items(): + if value is SUPPRESS: + del params[name] + if params.get('choices') is not None: + choices_str = ', '.join(str(c) for c in params['choices']) + params['choices'] = choices_str + return action.help % params + + def _iter_indented_subactions(self, action): + try: + get_subactions = action._get_subactions + except AttributeError: + pass + else: + self._indent() + for subaction in get_subactions(): + yield subaction + self._dedent() + + def _split_lines(self, text, width): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.wrap(text, width) + + def _fill_text(self, text, width, indent): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.fill(text, width, initial_indent=indent, + subsequent_indent=indent) + +class RawDescriptionHelpFormatter(HelpFormatter): + + def _fill_text(self, text, width, indent): + return ''.join(indent + line for line in text.splitlines(True)) + +class RawTextHelpFormatter(RawDescriptionHelpFormatter): + + def _split_lines(self, text, width): + return text.splitlines() + +# ===================== +# Options and Arguments +# ===================== + +class ArgumentError(Exception): + """ArgumentError(message, argument) + + Raised whenever there was an error creating or using an argument + (optional or positional). + + The string value of this exception is the message, augmented with + information about the argument that caused it. + """ + + def __init__(self, argument, message): + if argument.option_strings: + self.argument_name = '/'.join(argument.option_strings) + elif argument.metavar not in (None, SUPPRESS): + self.argument_name = argument.metavar + elif argument.dest not in (None, SUPPRESS): + self.argument_name = argument.dest + else: + self.argument_name = None + self.message = message + + def __str__(self): + if self.argument_name is None: + format = '%(message)s' + else: + format = 'argument %(argument_name)s: %(message)s' + return format % dict(message=self.message, + argument_name=self.argument_name) + +# ============== +# Action classes +# ============== + +class Action(_AttributeHolder): + """Action(*strings, **options) + + Action objects hold the information necessary to convert a + set of command-line arguments (possibly including an initial option + string) into the desired Python object(s). + + Keyword Arguments: + + option_strings -- A list of command-line option strings which + should be associated with this action. + + dest -- The name of the attribute to hold the created object(s) + + nargs -- The number of command-line arguments that should be consumed. + By default, one argument will be consumed and a single value will + be produced. Other values include: + * N (an integer) consumes N arguments (and produces a list) + * '?' consumes zero or one arguments + * '*' consumes zero or more arguments (and produces a list) + * '+' consumes one or more arguments (and produces a list) + Note that the difference between the default and nargs=1 is that + with the default, a single value will be produced, while with + nargs=1, a list containing a single value will be produced. + + const -- The value to be produced if the option is specified and the + option uses an action that takes no values. + + default -- The value to be produced if the option is not specified. + + type -- The type which the command-line arguments should be converted + to, should be one of 'string', 'int', 'float', 'complex' or a + callable object that accepts a single string argument. If None, + 'string' is assumed. + + choices -- A container of values that should be allowed. If not None, + after a command-line argument has been converted to the appropriate + type, an exception will be raised if it is not a member of this + collection. + + required -- True if the action must always be specified at the command + line. This is only meaningful for optional command-line arguments. + + help -- The help string describing the argument. + + metavar -- The name to be used for the option's argument with the help + string. If None, the 'dest' value will be used as the name. + """ + + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + self.option_strings = option_strings + self.dest = dest + self.nargs = nargs + self.const = const + self.default = default + self.type = type + self.choices = choices + self.required = required + self.help = help + self.metavar = metavar + + def _get_kwargs(self): + names = [ + 'option_strings', + 'dest', + 'nargs', + 'const', + 'default', + 'type', + 'choices', + 'help', + 'metavar' + ] + return [(name, getattr(self, name)) for name in names] + + def __call__(self, parser, namespace, values, option_string=None): + raise NotImplementedError(_('.__call__() not defined')) + +class _StoreAction(Action): + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs must be > 0') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_StoreAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, values) + +class _StoreConstAction(Action): + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_StoreConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, self.const) + +class _StoreTrueAction(_StoreConstAction): + def __init__(self, + option_strings, + dest, + default=False, + required=False, + help=None): + super(_StoreTrueAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=True, + default=default, + required=required, + help=help) + +class _StoreFalseAction(_StoreConstAction): + def __init__(self, + option_strings, + dest, + default=True, + required=False, + help=None): + super(_StoreFalseAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=False, + default=default, + required=required, + help=help) + +class _AppendAction(Action): + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs must be > 0') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_AppendAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + _ensure_value(namespace, self.dest, []).append(values) + +class _AppendConstAction(Action): + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_AppendConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + _ensure_value(namespace, self.dest, []).append(self.const) + +class _CountAction(Action): + def __init__(self, + option_strings, + dest, + default=None, + required=False, + help=None): + super(_CountAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + new_count = _ensure_value(namespace, self.dest, 0) + 1 + setattr(namespace, self.dest, new_count) + +class _HelpAction(Action): + def __init__(self, + option_strings, + dest=SUPPRESS, + default=SUPPRESS, + help=None): + super(_HelpAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + parser.print_help() + parser.exit() + +class _VersionAction(Action): + def __init__(self, + option_strings, + dest=SUPPRESS, + default=SUPPRESS, + help=None): + super(_VersionAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + parser.print_version() + parser.exit() + +class _SubParsersAction(Action): + + class _ChoicesPseudoAction(Action): + def __init__(self, name, help): + sup = super(_SubParsersAction._ChoicesPseudoAction, self) + sup.__init__(option_strings=[], dest=name, help=help) + + + def __init__(self, + option_strings, + prog, + parser_class, + dest=SUPPRESS, + help=None, + metavar=None): + + self._prog_prefix = prog + self._parser_class = parser_class + self._name_parser_map = {} + self._choices_actions = [] + + super(_SubParsersAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=PARSER, + choices=self._name_parser_map, + help=help, + metavar=metavar) + + def add_parser(self, name, **kwargs): + # set prog from the existing prefix + if kwargs.get('prog') is None: + kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + + # create a pseudo-action to hold the choice help + if 'help' in kwargs: + help = kwargs.pop('help') + choice_action = self._ChoicesPseudoAction(name, help) + self._choices_actions.append(choice_action) + + # create the parser and add it to the map + parser = self._parser_class(**kwargs) + self._name_parser_map[name] = parser + return parser + + def _get_subactions(self): + return self._choices_actions + + def __call__(self, parser, namespace, values, option_string=None): + parser_name = values[0] + arg_strings = values[1:] + + # set the parser name if requested + if self.dest is not SUPPRESS: + setattr(namespace, self.dest, parser_name) + + # select the parser + try: + parser = self._name_parser_map[parser_name] + except KeyError: + tup = parser_name, ', '.join(self._name_parser_map) + msg = _('unknown parser %r (choices: %s)' % tup) + raise ArgumentError(self, msg) + + # parse all the remaining options into the namespace + parser.parse_args(arg_strings, namespace) + + +# ============== +# Type classes +# ============== + +class FileType(object): + """Factory for creating file object types + + Instances of FileType are typically passed as type= arguments to the + ArgumentParser add_argument() method. + + Keyword Arguments: + mode -- A string indicating how the file is to be opened. Accepts the + same values as the builtin open() function. + bufsize -- The file's desired buffer size. Accepts the same values as + the builtin open() function. + """ + def __init__(self, mode='r', bufsize=None): + self._mode = mode + self._bufsize = bufsize + + def __call__(self, string): + # the special argument "-" means sys.std{in,out} + if string == '-': + if self._mode == 'r': + return _sys.stdin + elif self._mode == 'w': + return _sys.stdout + else: + msg = _('argument "-" with mode %r' % self._mode) + raise ValueError(msg) + + # all other arguments are used as file names + if self._bufsize: + return open(string, self._mode, self._bufsize) + else: + return open(string, self._mode) + + +# =========================== +# Optional and Positional Parsing +# =========================== + +class Namespace(_AttributeHolder): + + def __init__(self, **kwargs): + for name, value in kwargs.iteritems(): + setattr(self, name, value) + + def __eq__(self, other): + return vars(self) == vars(other) + + def __ne__(self, other): + return not (self == other) + + +class _ActionsContainer(object): + def __init__(self, + description, + prefix_chars, + argument_default, + conflict_handler): + super(_ActionsContainer, self).__init__() + + self.description = description + self.argument_default = argument_default + self.prefix_chars = prefix_chars + self.conflict_handler = conflict_handler + + # set up registries + self._registries = {} + + # register actions + self.register('action', None, _StoreAction) + self.register('action', 'store', _StoreAction) + self.register('action', 'store_const', _StoreConstAction) + self.register('action', 'store_true', _StoreTrueAction) + self.register('action', 'store_false', _StoreFalseAction) + self.register('action', 'append', _AppendAction) + self.register('action', 'append_const', _AppendConstAction) + self.register('action', 'count', _CountAction) + self.register('action', 'help', _HelpAction) + self.register('action', 'version', _VersionAction) + self.register('action', 'parsers', _SubParsersAction) + + # raise an exception if the conflict handler is invalid + self._get_handler() + + # action storage + self._optional_actions_list = [] + self._positional_actions_list = [] + self._positional_actions_full_list = [] + self._option_strings = {} + + # defaults storage + self._defaults = {} + + # ==================== + # Registration methods + # ==================== + + def register(self, registry_name, value, object): + registry = self._registries.setdefault(registry_name, {}) + registry[value] = object + + def _registry_get(self, registry_name, value, default=None): + return self._registries[registry_name].get(value, default) + + # ================================== + # Namespace default settings methods + # ================================== + + def set_defaults(self, **kwargs): + self._defaults.update(kwargs) + + # if these defaults match any existing arguments, replace + # the previous default on the object with the new one + for action_list in [self._option_strings.values(), + self._positional_actions_full_list]: + for action in action_list: + if action.dest in kwargs: + action.default = kwargs[action.dest] + + # ======================= + # Adding argument actions + # ======================= + + def add_argument(self, *args, **kwargs): + """ + add_argument(dest, ..., name=value, ...) + add_argument(option_string, option_string, ..., name=value, ...) + """ + + # if no positional args are supplied or only one is supplied and + # it doesn't look like an option string, parse a positional + # argument + chars = self.prefix_chars + if not args or len(args) == 1 and args[0][0] not in chars: + kwargs = self._get_positional_kwargs(*args, **kwargs) + + # otherwise, we're adding an optional argument + else: + kwargs = self._get_optional_kwargs(*args, **kwargs) + + # if no default was supplied, use the parser-level default + if 'default' not in kwargs: + dest = kwargs['dest'] + if dest in self._defaults: + kwargs['default'] = self._defaults[dest] + elif self.argument_default is not None: + kwargs['default'] = self.argument_default + + # create the action object, and add it to the parser + action_class = self._pop_action_class(kwargs) + action = action_class(**kwargs) + return self._add_action(action) + + def _add_action(self, action): + # resolve any conflicts + self._check_conflict(action) + + # add to optional or positional list + if action.option_strings: + self._optional_actions_list.append(action) + else: + self._positional_actions_list.append(action) + self._positional_actions_full_list.append(action) + action.container = self + + # index the action by any option strings it has + for option_string in action.option_strings: + self._option_strings[option_string] = action + + # return the created action + return action + + def _add_container_actions(self, container): + for action in container._optional_actions_list: + self._add_action(action) + for action in container._positional_actions_list: + self._add_action(action) + + def _get_positional_kwargs(self, dest, **kwargs): + # make sure required is not specified + if 'required' in kwargs: + msg = _("'required' is an invalid argument for positionals") + raise TypeError(msg) + + # return the keyword arguments with no option strings + return dict(kwargs, dest=dest, option_strings=[]) + + def _get_optional_kwargs(self, *args, **kwargs): + # determine short and long option strings + option_strings = [] + long_option_strings = [] + for option_string in args: + # error on one-or-fewer-character option strings + if len(option_string) < 2: + msg = _('invalid option string %r: ' + 'must be at least two characters long') + raise ValueError(msg % option_string) + + # error on strings that don't start with an appropriate prefix + if not option_string[0] in self.prefix_chars: + msg = _('invalid option string %r: ' + 'must start with a character %r') + tup = option_string, self.prefix_chars + raise ValueError(msg % tup) + + # error on strings that are all prefix characters + if not (set(option_string) - set(self.prefix_chars)): + msg = _('invalid option string %r: ' + 'must contain characters other than %r') + tup = option_string, self.prefix_chars + raise ValueError(msg % tup) + + # strings starting with two prefix characters are long options + option_strings.append(option_string) + if option_string[0] in self.prefix_chars: + if option_string[1] in self.prefix_chars: + long_option_strings.append(option_string) + + # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' + dest = kwargs.pop('dest', None) + if dest is None: + if long_option_strings: + dest_option_string = long_option_strings[0] + else: + dest_option_string = option_strings[0] + dest = dest_option_string.lstrip(self.prefix_chars) + dest = dest.replace('-', '_') + + # return the updated keyword arguments + return dict(kwargs, dest=dest, option_strings=option_strings) + + def _pop_action_class(self, kwargs, default=None): + action = kwargs.pop('action', default) + return self._registry_get('action', action, action) + + def _get_handler(self): + # determine function from conflict handler string + handler_func_name = '_handle_conflict_%s' % self.conflict_handler + try: + return getattr(self, handler_func_name) + except AttributeError: + msg = _('invalid conflict_resolution value: %r') + raise ValueError(msg % self.conflict_handler) + + def _check_conflict(self, action): + + # find all options that conflict with this option + confl_optionals = [] + for option_string in action.option_strings: + if option_string in self._option_strings: + confl_optional = self._option_strings[option_string] + confl_optionals.append((option_string, confl_optional)) + + # resolve any conflicts + if confl_optionals: + conflict_handler = self._get_handler() + conflict_handler(action, confl_optionals) + + def _handle_conflict_error(self, action, conflicting_actions): + message = _('conflicting option string(s): %s') + conflict_string = ', '.join(option_string + for option_string, action + in conflicting_actions) + raise ArgumentError(action, message % conflict_string) + + def _handle_conflict_resolve(self, action, conflicting_actions): + + # remove all conflicting options + for option_string, action in conflicting_actions: + + # remove the conflicting option + action.option_strings.remove(option_string) + self._option_strings.pop(option_string, None) + + # if the option now has no option string, remove it from the + # container holding it + if not action.option_strings: + action.container._optional_actions_list.remove(action) + + +class _ArgumentGroup(_ActionsContainer): + + def __init__(self, container, title=None, description=None, **kwargs): + # add any missing keyword arguments by checking the container + update = kwargs.setdefault + update('conflict_handler', container.conflict_handler) + update('prefix_chars', container.prefix_chars) + update('argument_default', container.argument_default) + super_init = super(_ArgumentGroup, self).__init__ + super_init(description=description, **kwargs) + + self.title = title + self._registries = container._registries + self._positional_actions_full_list = container._positional_actions_full_list + self._option_strings = container._option_strings + self._defaults = container._defaults + + +class ArgumentParser(_AttributeHolder, _ActionsContainer): + + def __init__(self, + prog=None, + usage=None, + description=None, + epilog=None, + version=None, + parents=[], + formatter_class=HelpFormatter, + prefix_chars='-', + argument_default=None, + conflict_handler='error', + add_help=True): + + superinit = super(ArgumentParser, self).__init__ + superinit(description=description, + prefix_chars=prefix_chars, + argument_default=argument_default, + conflict_handler=conflict_handler) + + # default setting for prog + if prog is None: + prog = _os.path.basename(_sys.argv[0]) + + self.prog = prog + self.usage = usage + self.epilog = epilog + self.version = version + self.formatter_class = formatter_class + self.add_help = add_help + + self._argument_group_class = _ArgumentGroup + self._has_subparsers = False + self._argument_groups = [] + + # register types + def identity(string): + return string + self.register('type', None, identity) + + # add help and version arguments if necessary + # (using explicit default to override global argument_default) + if self.add_help: + self.add_argument( + '-h', '--help', action='help', default=SUPPRESS, + help=_('show this help message and exit')) + if self.version: + self.add_argument( + '-v', '--version', action='version', default=SUPPRESS, + help=_("show program's version number and exit")) + + # add parent arguments and defaults + for parent in parents: + self._add_container_actions(parent) + try: + defaults = parent._defaults + except AttributeError: + pass + else: + self._defaults.update(defaults) + + # determines whether an "option" looks like a negative number + self._negative_number_matcher = _re.compile(r'^-\d+|-\d*.\d+$') + + + # ======================= + # Pretty __repr__ methods + # ======================= + + def _get_kwargs(self): + names = [ + 'prog', + 'usage', + 'description', + 'version', + 'formatter_class', + 'conflict_handler', + 'add_help', + ] + return [(name, getattr(self, name)) for name in names] + + # ================================== + # Optional/Positional adding methods + # ================================== + + def add_argument_group(self, *args, **kwargs): + group = self._argument_group_class(self, *args, **kwargs) + self._argument_groups.append(group) + return group + + def add_subparsers(self, **kwargs): + if self._has_subparsers: + self.error(_('cannot have multiple subparser arguments')) + + # add the parser class to the arguments if it's not present + kwargs.setdefault('parser_class', type(self)) + + # prog defaults to the usage message of this parser, skipping + # optional arguments and with no "usage:" prefix + if kwargs.get('prog') is None: + formatter = self._get_formatter() + formatter.add_usage(self.usage, [], + self._get_positional_actions(), '') + kwargs['prog'] = formatter.format_help().strip() + + # create the parsers action and add it to the positionals list + parsers_class = self._pop_action_class(kwargs, 'parsers') + action = parsers_class(option_strings=[], **kwargs) + self._positional_actions_list.append(action) + self._positional_actions_full_list.append(action) + self._has_subparsers = True + + # return the created parsers action + return action + + def _add_container_actions(self, container): + super(ArgumentParser, self)._add_container_actions(container) + try: + groups = container._argument_groups + except AttributeError: + pass + else: + for group in groups: + new_group = self.add_argument_group( + title=group.title, + description=group.description, + conflict_handler=group.conflict_handler) + new_group._add_container_actions(group) + + def _get_optional_actions(self): + actions = [] + actions.extend(self._optional_actions_list) + for argument_group in self._argument_groups: + actions.extend(argument_group._optional_actions_list) + return actions + + def _get_positional_actions(self): + return list(self._positional_actions_full_list) + + + # ===================================== + # Command line argument parsing methods + # ===================================== + + def parse_args(self, args=None, namespace=None): + # args default to the system args + if args is None: + args = _sys.argv[1:] + + # default Namespace built from parser defaults + if namespace is None: + namespace = Namespace() + + # add any action defaults that aren't present + optional_actions = self._get_optional_actions() + positional_actions = self._get_positional_actions() + for action in optional_actions + positional_actions: + if action.dest is not SUPPRESS: + if not hasattr(namespace, action.dest): + if action.default is not SUPPRESS: + default = action.default + if isinstance(action.default, basestring): + default = self._get_value(action, default) + setattr(namespace, action.dest, default) + + # add any parser defaults that aren't present + for dest, value in self._defaults.iteritems(): + if not hasattr(namespace, dest): + setattr(namespace, dest, value) + + # parse the arguments and exit if there are any errors + try: + result = self._parse_args(args, namespace) + except ArgumentError, err: + self.error(str(err)) + + # make sure all required optionals are present + for action in self._get_optional_actions(): + if action.required: + if getattr(result, action.dest, None) is None: + opt_strs = '/'.join(action.option_strings) + msg = _('option %s is required' % opt_strs) + self.error(msg) + + # return the parsed arguments + return result + + def _parse_args(self, arg_strings, namespace): + + # find all option indices, and determine the arg_string_pattern + # which has an 'O' if there is an option at an index, + # an 'A' if there is an argument, or a '-' if there is a '--' + option_string_indices = {} + arg_string_pattern_parts = [] + arg_strings_iter = iter(arg_strings) + for i, arg_string in enumerate(arg_strings_iter): + + # all args after -- are non-options + if arg_string == '--': + arg_string_pattern_parts.append('-') + for arg_string in arg_strings_iter: + arg_string_pattern_parts.append('A') + + # otherwise, add the arg to the arg strings + # and note the index if it was an option + else: + option_tuple = self._parse_optional(arg_string) + if option_tuple is None: + pattern = 'A' + else: + option_string_indices[i] = option_tuple + pattern = 'O' + arg_string_pattern_parts.append(pattern) + + # join the pieces together to form the pattern + arg_strings_pattern = ''.join(arg_string_pattern_parts) + + # converts arg strings to the appropriate and then takes the action + def take_action(action, argument_strings, option_string=None): + argument_values = self._get_values(action, argument_strings) + # take the action if we didn't receive a SUPPRESS value + # (e.g. from a default) + if argument_values is not SUPPRESS: + action(self, namespace, argument_values, option_string) + + # function to convert arg_strings into an optional action + def consume_optional(start_index): + + # determine the optional action and parse any explicit + # argument out of the option string + option_tuple = option_string_indices[start_index] + action, option_string, explicit_arg = option_tuple + + # loop because single-dash options can be chained + # (e.g. -xyz is the same as -x -y -z if no args are required) + match_argument = self._match_argument + action_tuples = [] + while True: + + # if we found no optional action, raise an error + if action is None: + self.error(_('no such option: %s') % option_string) + + # if there is an explicit argument, try to match the + # optional's string arguments to only this + if explicit_arg is not None: + arg_count = match_argument(action, 'A') + + # if the action is a single-dash option and takes no + # arguments, try to parse more single-dash options out + # of the tail of the option string + chars = self.prefix_chars + if arg_count == 0 and option_string[1] not in chars: + action_tuples.append((action, [], option_string)) + parse_optional = self._parse_optional + for char in self.prefix_chars: + option_string = char + explicit_arg + option_tuple = parse_optional(option_string) + if option_tuple[0] is not None: + break + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # set the action, etc. for the next loop iteration + action, option_string, explicit_arg = option_tuple + + # if the action expect exactly one argument, we've + # successfully matched the option; exit the loop + elif arg_count == 1: + stop = start_index + 1 + args = [explicit_arg] + action_tuples.append((action, args, option_string)) + break + + # error if a double-dash option did not use the + # explicit argument + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # if there is no explicit argument, try to match the + # optional's string arguments with the following strings + # if successful, exit the loop + else: + start = start_index + 1 + selected_patterns = arg_strings_pattern[start:] + arg_count = match_argument(action, selected_patterns) + stop = start + arg_count + args = arg_strings[start:stop] + action_tuples.append((action, args, option_string)) + break + + # add the Optional to the list and return the index at which + # the Optional's string args stopped + assert action_tuples + for action, args, option_string in action_tuples: + take_action(action, args, option_string) + return stop + + # the list of Positionals left to be parsed; this is modified + # by consume_positionals() + positionals = self._get_positional_actions() + + # function to convert arg_strings into positional actions + def consume_positionals(start_index): + # match as many Positionals as possible + match_partial = self._match_arguments_partial + selected_pattern = arg_strings_pattern[start_index:] + arg_counts = match_partial(positionals, selected_pattern) + + # slice off the appropriate arg strings for each Positional + # and add the Positional and its args to the list + for action, arg_count in zip(positionals, arg_counts): + args = arg_strings[start_index: start_index + arg_count] + start_index += arg_count + take_action(action, args) + + # slice off the Positionals that we just parsed and return the + # index at which the Positionals' string args stopped + positionals[:] = positionals[len(arg_counts):] + return start_index + + # consume Positionals and Optionals alternately, until we have + # passed the last option string + start_index = 0 + if option_string_indices: + max_option_string_index = max(option_string_indices) + else: + max_option_string_index = -1 + while start_index <= max_option_string_index: + + # consume any Positionals preceding the next option + next_option_string_index = min( + index + for index in option_string_indices + if index >= start_index) + if start_index != next_option_string_index: + positionals_end_index = consume_positionals(start_index) + + # only try to parse the next optional if we didn't consume + # the option string during the positionals parsing + if positionals_end_index > start_index: + start_index = positionals_end_index + continue + else: + start_index = positionals_end_index + + # if we consumed all the positionals we could and we're not + # at the index of an option string, there were unparseable + # arguments + if start_index not in option_string_indices: + msg = _('extra arguments found: %s') + extras = arg_strings[start_index:next_option_string_index] + self.error(msg % ' '.join(extras)) + + # consume the next optional and any arguments for it + start_index = consume_optional(start_index) + + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) + + # if we didn't consume all the argument strings, there were too + # many supplied + if stop_index != len(arg_strings): + extras = arg_strings[stop_index:] + self.error(_('extra arguments found: %s') % ' '.join(extras)) + + # if we didn't use all the Positional objects, there were too few + # arg strings supplied. + if positionals: + self.error(_('too few arguments')) + + # return the updated namespace + return namespace + + def _match_argument(self, action, arg_strings_pattern): + # match the pattern for this action to the arg strings + nargs_pattern = self._get_nargs_pattern(action) + match = _re.match(nargs_pattern, arg_strings_pattern) + + # raise an exception if we weren't able to find a match + if match is None: + nargs_errors = { + None:_('expected one argument'), + OPTIONAL:_('expected at most one argument'), + ONE_OR_MORE:_('expected at least one argument') + } + default = _('expected %s argument(s)') % action.nargs + msg = nargs_errors.get(action.nargs, default) + raise ArgumentError(action, msg) + + # return the number of arguments matched + return len(match.group(1)) + + def _match_arguments_partial(self, actions, arg_strings_pattern): + # progressively shorten the actions list by slicing off the + # final actions until we find a match + result = [] + for i in xrange(len(actions), 0, -1): + actions_slice = actions[:i] + pattern = ''.join(self._get_nargs_pattern(action) + for action in actions_slice) + match = _re.match(pattern, arg_strings_pattern) + if match is not None: + result.extend(len(string) for string in match.groups()) + break + + # return the list of arg string counts + return result + + def _parse_optional(self, arg_string): + # if it doesn't start with a prefix, it was meant to be positional + if not arg_string[0] in self.prefix_chars: + return None + + # if it's just dashes, it was meant to be positional + if not arg_string.strip('-'): + return None + + # if the option string is present in the parser, return the action + if arg_string in self._option_strings: + action = self._option_strings[arg_string] + return action, arg_string, None + + # search through all possible prefixes of the option string + # and all actions in the parser for possible interpretations + option_tuples = [] + prefix_tuples = self._get_option_prefix_tuples(arg_string) + for option_string in self._option_strings: + for option_prefix, explicit_arg in prefix_tuples: + if option_string.startswith(option_prefix): + action = self._option_strings[option_string] + tup = action, option_string, explicit_arg + option_tuples.append(tup) + break + + # if multiple actions match, the option string was ambiguous + if len(option_tuples) > 1: + options = ', '.join(opt_str for _, opt_str, _ in option_tuples) + tup = arg_string, options + self.error(_('ambiguous option: %s could match %s') % tup) + + # if exactly one action matched, this segmentation is good, + # so return the parsed action + elif len(option_tuples) == 1: + option_tuple, = option_tuples + return option_tuple + + # if it was not found as an option, but it looks like a negative + # number, it was meant to be positional + if self._negative_number_matcher.match(arg_string): + return None + + # it was meant to be an optional but there is no such option + # in this parser (though it might be a valid option in a subparser) + return None, arg_string, None + + def _get_option_prefix_tuples(self, option_string): + result = [] + + # option strings starting with two prefix characters are only + # split at the '=' + chars = self.prefix_chars + if option_string[0] in chars and option_string[1] in chars: + if '=' in option_string: + option_prefix, explicit_arg = option_string.split('=', 1) + else: + option_prefix = option_string + explicit_arg = None + tup = option_prefix, explicit_arg + result.append(tup) + + # option strings starting with a single prefix character are + # split at all indices + else: + for first_index, char in enumerate(option_string): + if char not in self.prefix_chars: + break + for i in xrange(len(option_string), first_index, -1): + tup = option_string[:i], option_string[i:] or None + result.append(tup) + + # return the collected prefix tuples + return result + + def _get_nargs_pattern(self, action): + # in all examples below, we have to allow for '--' args + # which are represented as '-' in the pattern + nargs = action.nargs + + # the default (None) is assumed to be a single argument + if nargs is None: + nargs_pattern = '(-*A-*)' + + # allow zero or one arguments + elif nargs == OPTIONAL: + nargs_pattern = '(-*A?-*)' + + # allow zero or more arguments + elif nargs == ZERO_OR_MORE: + nargs_pattern = '(-*[A-]*)' + + # allow one or more arguments + elif nargs == ONE_OR_MORE: + nargs_pattern = '(-*A[A-]*)' + + # allow one argument followed by any number of options or arguments + elif nargs is PARSER: + nargs_pattern = '(-*A[-AO]*)' + + # all others should be integers + else: + nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) + + # if this is an optional action, -- is not allowed + if action.option_strings: + nargs_pattern = nargs_pattern.replace('-*', '') + nargs_pattern = nargs_pattern.replace('-', '') + + # return the pattern + return nargs_pattern + + # ======================== + # Value conversion methods + # ======================== + + def _get_values(self, action, arg_strings): + # for everything but PARSER args, strip out '--' + if action.nargs is not PARSER: + arg_strings = [s for s in arg_strings if s != '--'] + + # optional argument produces a default when not present + if not arg_strings and action.nargs == OPTIONAL: + if action.option_strings: + value = action.const + else: + value = action.default + if isinstance(value, basestring): + value = self._get_value(action, value) + self._check_value(action, value) + + # when nargs='*' on a positional, if there were no command-line + # args, use the default if it is anything other than None + elif (not arg_strings and action.nargs == ZERO_OR_MORE and + not action.option_strings): + if action.default is not None: + value = action.default + else: + value = arg_strings + self._check_value(action, value) + + # single argument or optional argument produces a single value + elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: + arg_string, = arg_strings + value = self._get_value(action, arg_string) + self._check_value(action, value) + + # PARSER arguments convert all values, but check only the first + elif action.nargs is PARSER: + value = list(self._get_value(action, v) for v in arg_strings) + self._check_value(action, value[0]) + + # all other types of nargs produce a list + else: + value = list(self._get_value(action, v) for v in arg_strings) + for v in value: + self._check_value(action, v) + + # return the converted value + return value + + def _get_value(self, action, arg_string): + type_func = self._registry_get('type', action.type, action.type) + if not callable(type_func): + msg = _('%r is not callable') + raise ArgumentError(action, msg % type_func) + + # convert the value to the appropriate type + try: + result = type_func(arg_string) + + # TypeErrors or ValueErrors indicate errors + except (TypeError, ValueError): + name = getattr(action.type, '__name__', repr(action.type)) + msg = _('invalid %s value: %r') + raise ArgumentError(action, msg % (name, arg_string)) + + # return the converted value + return result + + def _check_value(self, action, value): + # converted value must be one of the choices (if specified) + if action.choices is not None and value not in action.choices: + tup = value, ', '.join(map(repr, action.choices)) + msg = _('invalid choice: %r (choose from %s)') % tup + raise ArgumentError(action, msg) + + + + # ======================= + # Help-formatting methods + # ======================= + + def format_usage(self): + formatter = self._get_formatter() + formatter.add_usage(self.usage, + self._get_optional_actions(), + self._get_positional_actions()) + return formatter.format_help() + + def format_help(self): + formatter = self._get_formatter() + + # usage + formatter.add_usage(self.usage, + self._get_optional_actions(), + self._get_positional_actions()) + + # description + formatter.add_text(self.description) + + # positionals + formatter.start_section(_('positional arguments')) + formatter.add_arguments(self._positional_actions_list) + formatter.end_section() + + # optionals + formatter.start_section(_('optional arguments')) + formatter.add_arguments(self._optional_actions_list) + formatter.end_section() + + # user-defined groups + for argument_group in self._argument_groups: + formatter.start_section(argument_group.title) + formatter.add_text(argument_group.description) + formatter.add_arguments(argument_group._positional_actions_list) + formatter.add_arguments(argument_group._optional_actions_list) + formatter.end_section() + + # epilog + formatter.add_text(self.epilog) + + # determine help from format above + return formatter.format_help() + + def format_version(self): + formatter = self._get_formatter() + formatter.add_text(self.version) + return formatter.format_help() + + def _get_formatter(self): + return self.formatter_class(prog=self.prog) + + # ===================== + # Help-printing methods + # ===================== + + def print_usage(self, file=None): + self._print_message(self.format_usage(), file) + + def print_help(self, file=None): + self._print_message(self.format_help(), file) + + def print_version(self, file=None): + self._print_message(self.format_version(), file) + + def _print_message(self, message, file=None): + if message: + if file is None: + file = _sys.stderr + file.write(message) + + + # =============== + # Exiting methods + # =============== + + def exit(self, status=0, message=None): + if message: + _sys.stderr.write(message) + _sys.exit(status) + + def error(self, message): + """error(message: string) + + Prints a usage message incorporating the message to stderr and + exits. + + If you override this in a subclass, it should not return -- it + should either exit or raise an exception. + """ + self.print_usage(_sys.stderr) + self.exit(2, _('%s: error: %s\n') % (self.prog, message)) diff --git a/IPython/kernel/config/__init__.py b/IPython/kernel/config/__init__.py index e24c5c9..7a200bb 100644 --- a/IPython/kernel/config/__init__.py +++ b/IPython/kernel/config/__init__.py @@ -61,6 +61,7 @@ controller_config = dict( logfile = '', # Empty means log to stdout import_statement = '', + reuse_furls = False, # If False, old furl files are deleted engine_tub = dict( ip = '', # Empty string means all interfaces diff --git a/IPython/kernel/engineconnector.py b/IPython/kernel/engineconnector.py index 93626e8..c7be8a9 100644 --- a/IPython/kernel/engineconnector.py +++ b/IPython/kernel/engineconnector.py @@ -18,7 +18,8 @@ __docformat__ = "restructuredtext en" import os import cPickle as pickle -from twisted.python import log +from twisted.python import log, failure +from twisted.internet import defer from IPython.kernel.fcutil import find_furl from IPython.kernel.enginefc import IFCEngine @@ -62,13 +63,17 @@ class EngineConnector(object): self.tub.startService() self.engine_service = engine_service self.engine_reference = IFCEngine(self.engine_service) - self.furl = find_furl(furl_or_file) + try: + self.furl = find_furl(furl_or_file) + except ValueError: + return defer.fail(failure.Failure()) + # return defer.fail(failure.Failure(ValueError('not a valid furl or furl file: %r' % furl_or_file))) d = self.tub.getReference(self.furl) d.addCallbacks(self._register, self._log_failure) return d def _log_failure(self, reason): - log.err('engine registration failed:') + log.err('EngineConnector: engine registration failed:') log.err(reason) return reason diff --git a/IPython/kernel/scripts/ipcluster.py b/IPython/kernel/scripts/ipcluster.py index 08e8c9a..02f8060 100644 --- a/IPython/kernel/scripts/ipcluster.py +++ b/IPython/kernel/scripts/ipcluster.py @@ -1,347 +1,486 @@ #!/usr/bin/env python # encoding: utf-8 -"""Start an IPython cluster conveniently, either locally or remotely. +"""Start an IPython cluster = (controller + engines).""" -Basic usage ------------ - -For local operation, the simplest mode of usage is: - - %prog -n N - -where N is the number of engines you want started. - -For remote operation, you must call it with a cluster description file: - - %prog -f clusterfile.py - -The cluster file is a normal Python script which gets run via execfile(). You -can have arbitrary logic in it, but all that matters is that at the end of the -execution, it declares the variables 'controller', 'engines', and optionally -'sshx'. See the accompanying examples for details on what these variables must -contain. - - -Notes ------ - -WARNING: this code is still UNFINISHED and EXPERIMENTAL! It is incomplete, -some listed options are not really implemented, and all of its interfaces are -subject to change. - -When operating over SSH for a remote cluster, this program relies on the -existence of a particular script called 'sshx'. This script must live in the -target systems where you'll be running your controller and engines, and is -needed to configure your PATH and PYTHONPATH variables for further execution of -python code at the other end of an SSH connection. The script can be as simple -as: - -#!/bin/sh -. $HOME/.bashrc -"$@" - -which is the default one provided by IPython. You can modify this or provide -your own. Since it's quite likely that for different clusters you may need -this script to configure things differently or that it may live in different -locations, its full path can be set in the same file where you define the -cluster setup. IPython's order of evaluation for this variable is the -following: - - a) Internal default: 'sshx'. This only works if it is in the default system - path which SSH sets up in non-interactive mode. - - b) Environment variable: if $IPYTHON_SSHX is defined, this overrides the - internal default. - - c) Variable 'sshx' in the cluster configuration file: finally, this will - override the previous two values. - -This code is Unix-only, with precious little hope of any of this ever working -under Windows, since we need SSH from the ground up, we background processes, -etc. Ports of this functionality to Windows are welcome. - - -Call summary ------------- - - %prog [options] -""" - -__docformat__ = "restructuredtext en" - -#------------------------------------------------------------------------------- +#----------------------------------------------------------------------------- # Copyright (C) 2008 The IPython Development Team # # Distributed under the terms of the BSD License. The full license is in # the file COPYING, distributed as part of this software. -#------------------------------------------------------------------------------- +#----------------------------------------------------------------------------- -#------------------------------------------------------------------------------- -# Stdlib imports -#------------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- import os -import signal +import re import sys -import time +import signal +pjoin = os.path.join -from optparse import OptionParser -from subprocess import Popen,call +from twisted.internet import reactor, defer +from twisted.internet.protocol import ProcessProtocol +from twisted.python import failure, log +from twisted.internet.error import ProcessDone, ProcessTerminated +from twisted.internet.utils import getProcessOutput -#--------------------------------------------------------------------------- -# IPython imports -#--------------------------------------------------------------------------- -from IPython.tools import utils -from IPython.genutils import get_ipython_dir +from IPython.external import argparse +from IPython.external import Itpl +from IPython.kernel.twistedutil import gatherBoth +from IPython.kernel.util import printer +from IPython.genutils import get_ipython_dir, num_cpus -#--------------------------------------------------------------------------- -# Normal code begins -#--------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# General process handling code +#----------------------------------------------------------------------------- -def parse_args(): - """Parse command line and return opts,args.""" +def find_exe(cmd): + try: + import win32api + except ImportError: + raise ImportError('you need to have pywin32 installed for this to work') + else: + (path, offest) = win32api.SearchPath(os.environ['PATH'],cmd) + return path - parser = OptionParser(usage=__doc__) - newopt = parser.add_option # shorthand +class ProcessStateError(Exception): + pass - newopt("--controller-port", type="int", dest="controllerport", - help="the TCP port the controller is listening on") +class UnknownStatus(Exception): + pass - newopt("--controller-ip", type="string", dest="controllerip", - help="the TCP ip address of the controller") +class LauncherProcessProtocol(ProcessProtocol): + """ + A ProcessProtocol to go with the ProcessLauncher. + """ + def __init__(self, process_launcher): + self.process_launcher = process_launcher + + def connectionMade(self): + self.process_launcher.fire_start_deferred(self.transport.pid) + + def processEnded(self, status): + value = status.value + if isinstance(value, ProcessDone): + self.process_launcher.fire_stop_deferred(0) + elif isinstance(value, ProcessTerminated): + self.process_launcher.fire_stop_deferred( + {'exit_code':value.exitCode, + 'signal':value.signal, + 'status':value.status + } + ) + else: + raise UnknownStatus("unknown exit status, this is probably a bug in Twisted") - newopt("-n", "--num", type="int", dest="n",default=2, - help="the number of engines to start") + def outReceived(self, data): + log.msg(data) - newopt("--engine-port", type="int", dest="engineport", - help="the TCP port the controller will listen on for engine " - "connections") - - newopt("--engine-ip", type="string", dest="engineip", - help="the TCP ip address the controller will listen on " - "for engine connections") + def errReceived(self, data): + log.err(data) - newopt("--mpi", type="string", dest="mpi", - help="use mpi with package: for instance --mpi=mpi4py") +class ProcessLauncher(object): + """ + Start and stop an external process in an asynchronous manner. + + Currently this uses deferreds to notify other parties of process state + changes. This is an awkward design and should be moved to using + a formal NotificationCenter. + """ + def __init__(self, cmd_and_args): + self.cmd = cmd_and_args[0] + self.args = cmd_and_args + self._reset() + + def _reset(self): + self.process_protocol = None + self.pid = None + self.start_deferred = None + self.stop_deferreds = [] + self.state = 'before' # before, running, or after + + @property + def running(self): + if self.state == 'running': + return True + else: + return False + + def fire_start_deferred(self, pid): + self.pid = pid + self.state = 'running' + log.msg('Process %r has started with pid=%i' % (self.args, pid)) + self.start_deferred.callback(pid) + + def start(self): + if self.state == 'before': + self.process_protocol = LauncherProcessProtocol(self) + self.start_deferred = defer.Deferred() + self.process_transport = reactor.spawnProcess( + self.process_protocol, + self.cmd, + self.args, + env=os.environ + ) + return self.start_deferred + else: + s = 'the process has already been started and has state: %r' % \ + self.state + return defer.fail(ProcessStateError(s)) + + def get_stop_deferred(self): + if self.state == 'running' or self.state == 'before': + d = defer.Deferred() + self.stop_deferreds.append(d) + return d + else: + s = 'this process is already complete' + return defer.fail(ProcessStateError(s)) + + def fire_stop_deferred(self, exit_code): + log.msg('Process %r has stopped with %r' % (self.args, exit_code)) + self.state = 'after' + for d in self.stop_deferreds: + d.callback(exit_code) + + def signal(self, sig): + """ + Send a signal to the process. + + The argument sig can be ('KILL','INT', etc.) or any signal number. + """ + if self.state == 'running': + self.process_transport.signalProcess(sig) - newopt("-l", "--logfile", type="string", dest="logfile", - help="log file name") + # def __del__(self): + # self.signal('KILL') + + def interrupt_then_kill(self, delay=1.0): + self.signal('INT') + reactor.callLater(delay, self.signal, 'KILL') + - newopt('-f','--cluster-file',dest='clusterfile', - help='file describing a remote cluster') +#----------------------------------------------------------------------------- +# Code for launching controller and engines +#----------------------------------------------------------------------------- - return parser.parse_args() -def numAlive(controller,engines): - """Return the number of processes still alive.""" - retcodes = [controller.poll()] + \ - [e.poll() for e in engines] - return retcodes.count(None) +class ControllerLauncher(ProcessLauncher): + + def __init__(self, extra_args=None): + if sys.platform == 'win32': + args = [find_exe('ipcontroller.bat')] + else: + args = ['ipcontroller'] + self.extra_args = extra_args + if extra_args is not None: + args.extend(extra_args) + + ProcessLauncher.__init__(self, args) -stop = lambda pid: os.kill(pid,signal.SIGINT) -kill = lambda pid: os.kill(pid,signal.SIGTERM) -def cleanup(clean,controller,engines): - """Stop the controller and engines with the given cleanup method.""" +class EngineLauncher(ProcessLauncher): - for e in engines: - if e.poll() is None: - print 'Stopping engine, pid',e.pid - clean(e.pid) - if controller.poll() is None: - print 'Stopping controller, pid',controller.pid - clean(controller.pid) - - -def ensureDir(path): - """Ensure a directory exists or raise an exception.""" - if not os.path.isdir(path): - os.makedirs(path) - - -def startMsg(control_host,control_port=10105): - """Print a startup message""" - print - print 'Your cluster is up and running.' - print - print 'For interactive use, you can make a MultiEngineClient with:' - print - print 'from IPython.kernel import client' - print "mec = client.MultiEngineClient()" - print - print 'You can then cleanly stop the cluster from IPython using:' - print - print 'mec.kill(controller=True)' - print + def __init__(self, extra_args=None): + if sys.platform == 'win32': + args = [find_exe('ipengine.bat')] + else: + args = ['ipengine'] + self.extra_args = extra_args + if extra_args is not None: + args.extend(extra_args) + + ProcessLauncher.__init__(self, args) + +class LocalEngineSet(object): -def clusterLocal(opt,arg): - """Start a cluster on the local machine.""" + def __init__(self, extra_args=None): + self.extra_args = extra_args + self.launchers = [] - # Store all logs inside the ipython directory - ipdir = get_ipython_dir() - pjoin = os.path.join - - logfile = opt.logfile - if logfile is None: - logdir_base = pjoin(ipdir,'log') - ensureDir(logdir_base) - logfile = pjoin(logdir_base,'ipcluster-') - - print 'Starting controller:', - controller = Popen(['ipcontroller','--logfile',logfile,'-x','-y']) - print 'Controller PID:',controller.pid - - print 'Starting engines: ', - time.sleep(5) - - englogfile = '%s%s-' % (logfile,controller.pid) - mpi = opt.mpi - if mpi: # start with mpi - killing the engines with sigterm will not work if you do this - engines = [Popen(['mpirun', '-np', str(opt.n), 'ipengine', '--mpi', - mpi, '--logfile',englogfile])] - # engines = [Popen(['mpirun', '-np', str(opt.n), 'ipengine', '--mpi', mpi])] - else: # do what we would normally do - engines = [ Popen(['ipengine','--logfile',englogfile]) - for i in range(opt.n) ] - eids = [e.pid for e in engines] - print 'Engines PIDs: ',eids - print 'Log files: %s*' % englogfile + def start(self, n): + dlist = [] + for i in range(n): + el = EngineLauncher(extra_args=self.extra_args) + d = el.start() + self.launchers.append(el) + dlist.append(d) + dfinal = gatherBoth(dlist, consumeErrors=True) + dfinal.addCallback(self._handle_start) + return dfinal - proc_ids = eids + [controller.pid] - procs = engines + [controller] - - grpid = os.getpgrp() - try: - startMsg('127.0.0.1') - print 'You can also hit Ctrl-C to stop it, or use from the cmd line:' - print - print 'kill -INT',grpid - print - try: - while True: - time.sleep(5) - except: - pass - finally: - print 'Stopping cluster. Cleaning up...' - cleanup(stop,controller,engines) - for i in range(4): - time.sleep(i+2) - nZombies = numAlive(controller,engines) - if nZombies== 0: - print 'OK: All processes cleaned up.' - break - print 'Trying again, %d processes did not stop...' % nZombies - cleanup(kill,controller,engines) - if numAlive(controller,engines) == 0: - print 'OK: All processes cleaned up.' - break - else: - print '*'*75 - print 'ERROR: could not kill some processes, try to do it', - print 'manually.' - zombies = [] - if controller.returncode is None: - print 'Controller is alive: pid =',controller.pid - zombies.append(controller.pid) - liveEngines = [ e for e in engines if e.returncode is None ] - for e in liveEngines: - print 'Engine is alive: pid =',e.pid - zombies.append(e.pid) - print - print 'Zombie summary:',' '.join(map(str,zombies)) - -def clusterRemote(opt,arg): - """Start a remote cluster over SSH""" - - # B. Granger, 9/3/08 - # The launching of a remote cluster using SSH and a clusterfile - # is broken. Because it won't be fixed before the 0.9 release, - # we are removing it. For now, we just print a message to the - # user and abort. + def _handle_start(self, r): + log.msg('Engines started with pids: %r' % r) + return r - print """The launching of a remote IPython cluster using SSL -and a clusterfile has been removed in this release. -It has been broken for a while and we are in the process -of building a new process management system that will be -used to provide a more robust way of starting an IPython -cluster. - -For now remote clusters have to be launched using ipcontroller -and ipengine separately. - """ - sys.exit(1) - - # Load the remote cluster configuration - clConfig = {} - execfile(opt.clusterfile,clConfig) - contConfig = clConfig['controller'] - engConfig = clConfig['engines'] - # Determine where to find sshx: - sshx = clConfig.get('sshx',os.environ.get('IPYTHON_SSHX','sshx')) + def _handle_stop(self, r): + log.msg('Engines received signal: %r' % r) + return r + + def signal(self, sig): + dlist = [] + for el in self.launchers: + d = el.get_stop_deferred() + dlist.append(d) + el.signal(sig) + dfinal = gatherBoth(dlist, consumeErrors=True) + dfinal.addCallback(self._handle_stop) + return dfinal + + def interrupt_then_kill(self, delay=1.0): + dlist = [] + for el in self.launchers: + d = el.get_stop_deferred() + dlist.append(d) + el.interrupt_then_kill(delay) + dfinal = gatherBoth(dlist, consumeErrors=True) + dfinal.addCallback(self._handle_stop) + return dfinal + + +class BatchEngineSet(object): - # Store all logs inside the ipython directory - ipdir = get_ipython_dir() - pjoin = os.path.join - - logfile = opt.logfile - if logfile is None: - logdir_base = pjoin(ipdir,'log') - ensureDir(logdir_base) - logfile = pjoin(logdir_base,'ipcluster') - - # Append this script's PID to the logfile name always - logfile = '%s-%s' % (logfile,os.getpid()) + # Subclasses must fill these in. See PBSEngineSet + submit_command = '' + delete_command = '' + job_id_regexp = '' - print 'Starting controller:' - # Controller data: - xsys = os.system - - contHost = contConfig['host'] - contLog = '%s-con-%s-' % (logfile,contHost) - cmd = "ssh %s '%s' 'ipcontroller --logfile %s' &" % \ - (contHost,sshx,contLog) - #print 'cmd:<%s>' % cmd # dbg - xsys(cmd) - time.sleep(2) - - print 'Starting engines: ' - for engineHost,engineData in engConfig.iteritems(): - if isinstance(engineData,int): - numEngines = engineData + def __init__(self, template_file, **kwargs): + self.template_file = template_file + self.context = {} + self.context.update(kwargs) + self.batch_file = self.template_file+'-run' + + def parse_job_id(self, output): + m = re.match(self.job_id_regexp, output) + if m is not None: + job_id = m.group() else: - raise NotImplementedError('port configuration not finished for engines') - - print 'Sarting %d engines on %s' % (numEngines,engineHost) - engLog = '%s-eng-%s-' % (logfile,engineHost) - for i in range(numEngines): - cmd = "ssh %s '%s' 'ipengine --controller-ip %s --logfile %s' &" % \ - (engineHost,sshx,contHost,engLog) - #print 'cmd:<%s>' % cmd # dbg - xsys(cmd) - # Wait after each host a little bit - time.sleep(1) - - startMsg(contConfig['host']) + raise Exception("job id couldn't be determined: %s" % output) + self.job_id = job_id + log.msg('Job started with job id: %r' % job_id) + return job_id + + def write_batch_script(self, n): + self.context['n'] = n + template = open(self.template_file, 'r').read() + log.msg('Using template for batch script: %s' % self.template_file) + script_as_string = Itpl.itplns(template, self.context) + log.msg('Writing instantiated batch script: %s' % self.batch_file) + f = open(self.batch_file,'w') + f.write(script_as_string) + f.close() + + def handle_error(self, f): + f.printTraceback() + f.raiseException() + + def start(self, n): + self.write_batch_script(n) + d = getProcessOutput(self.submit_command, + [self.batch_file],env=os.environ) + d.addCallback(self.parse_job_id) + d.addErrback(self.handle_error) + return d -def main(): - """Main driver for the two big options: local or remote cluster.""" + def kill(self): + d = getProcessOutput(self.delete_command, + [self.job_id],env=os.environ) + return d + +class PBSEngineSet(BatchEngineSet): - if sys.platform=='win32': - print """ipcluster does not work on Microsoft Windows. Please start -your IPython cluster using the ipcontroller and ipengine scripts.""" - sys.exit(1) + submit_command = 'qsub' + delete_command = 'qdel' + job_id_regexp = '\d+' - opt,arg = parse_args() + def __init__(self, template_file, **kwargs): + BatchEngineSet.__init__(self, template_file, **kwargs) + + +#----------------------------------------------------------------------------- +# Main functions for the different types of clusters +#----------------------------------------------------------------------------- + +# TODO: +# The logic in these codes should be moved into classes like LocalCluster +# MpirunCluster, PBSCluster, etc. This would remove alot of the duplications. +# The main functions should then just parse the command line arguments, create +# the appropriate class and call a 'start' method. + +def main_local(args): + cont_args = [] + cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) + if args.x: + cont_args.append('-x') + if args.y: + cont_args.append('-y') + cl = ControllerLauncher(extra_args=cont_args) + dstart = cl.start() + def start_engines(cont_pid): + engine_args = [] + engine_args.append('--logfile=%s' % \ + pjoin(args.logdir,'ipengine%s-' % cont_pid)) + eset = LocalEngineSet(extra_args=engine_args) + def shutdown(signum, frame): + log.msg('Stopping local cluster') + # We are still playing with the times here, but these seem + # to be reliable in allowing everything to exit cleanly. + eset.interrupt_then_kill(0.5) + cl.interrupt_then_kill(0.5) + reactor.callLater(1.0, reactor.stop) + signal.signal(signal.SIGINT,shutdown) + d = eset.start(args.n) + return d + def delay_start(cont_pid): + # This is needed because the controller doesn't start listening + # right when it starts and the controller needs to write + # furl files for the engine to pick up + reactor.callLater(1.0, start_engines, cont_pid) + dstart.addCallback(delay_start) + dstart.addErrback(lambda f: f.raiseException()) + +def main_mpirun(args): + cont_args = [] + cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) + if args.x: + cont_args.append('-x') + if args.y: + cont_args.append('-y') + cl = ControllerLauncher(extra_args=cont_args) + dstart = cl.start() + def start_engines(cont_pid): + raw_args = ['mpirun'] + raw_args.extend(['-n',str(args.n)]) + raw_args.append('ipengine') + raw_args.append('-l') + raw_args.append(pjoin(args.logdir,'ipengine%s-' % cont_pid)) + if args.mpi: + raw_args.append('--mpi=%s' % args.mpi) + eset = ProcessLauncher(raw_args) + def shutdown(signum, frame): + log.msg('Stopping local cluster') + # We are still playing with the times here, but these seem + # to be reliable in allowing everything to exit cleanly. + eset.interrupt_then_kill(1.0) + cl.interrupt_then_kill(1.0) + reactor.callLater(2.0, reactor.stop) + signal.signal(signal.SIGINT,shutdown) + d = eset.start() + return d + def delay_start(cont_pid): + # This is needed because the controller doesn't start listening + # right when it starts and the controller needs to write + # furl files for the engine to pick up + reactor.callLater(1.0, start_engines, cont_pid) + dstart.addCallback(delay_start) + dstart.addErrback(lambda f: f.raiseException()) + +def main_pbs(args): + cont_args = [] + cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller')) + if args.x: + cont_args.append('-x') + if args.y: + cont_args.append('-y') + cl = ControllerLauncher(extra_args=cont_args) + dstart = cl.start() + def start_engines(r): + pbs_set = PBSEngineSet(args.pbsscript) + def shutdown(signum, frame): + log.msg('Stopping pbs cluster') + d = pbs_set.kill() + d.addBoth(lambda _: cl.interrupt_then_kill(1.0)) + d.addBoth(lambda _: reactor.callLater(2.0, reactor.stop)) + signal.signal(signal.SIGINT,shutdown) + d = pbs_set.start(args.n) + return d + dstart.addCallback(start_engines) + dstart.addErrback(lambda f: f.raiseException()) + + +def get_args(): + base_parser = argparse.ArgumentParser(add_help=False) + base_parser.add_argument( + '-x', + action='store_true', + dest='x', + help='turn off client security' + ) + base_parser.add_argument( + '-y', + action='store_true', + dest='y', + help='turn off engine security' + ) + base_parser.add_argument( + "--logdir", + type=str, + dest="logdir", + help="directory to put log files (default=$IPYTHONDIR/log)", + default=pjoin(get_ipython_dir(),'log') + ) + base_parser.add_argument( + "-n", + "--num", + type=int, + dest="n", + default=2, + help="the number of engines to start" + ) + + parser = argparse.ArgumentParser( + description='IPython cluster startup. This starts a controller and\ + engines using various approaches. THIS IS A TECHNOLOGY PREVIEW AND\ + THE API WILL CHANGE SIGNIFICANTLY BEFORE THE FINAL RELEASE.' + ) + subparsers = parser.add_subparsers( + help='available cluster types. For help, do "ipcluster TYPE --help"') + + parser_local = subparsers.add_parser( + 'local', + help='run a local cluster', + parents=[base_parser] + ) + parser_local.set_defaults(func=main_local) + + parser_mpirun = subparsers.add_parser( + 'mpirun', + help='run a cluster using mpirun', + parents=[base_parser] + ) + parser_mpirun.add_argument( + "--mpi", + type=str, + dest="mpi", # Don't put a default here to allow no MPI support + help="how to call MPI_Init (default=mpi4py)" + ) + parser_mpirun.set_defaults(func=main_mpirun) + + parser_pbs = subparsers.add_parser( + 'pbs', + help='run a pbs cluster', + parents=[base_parser] + ) + parser_pbs.add_argument( + '--pbs-script', + type=str, + dest='pbsscript', + help='PBS script template', + default='pbs.template' + ) + parser_pbs.set_defaults(func=main_pbs) + args = parser.parse_args() + return args - clusterfile = opt.clusterfile - if clusterfile: - clusterRemote(opt,arg) - else: - clusterLocal(opt,arg) - - -if __name__=='__main__': +def main(): + args = get_args() + reactor.callWhenRunning(args.func, args) + log.startLogging(sys.stdout) + reactor.run() + +if __name__ == '__main__': main() diff --git a/IPython/kernel/scripts/ipcontroller.py b/IPython/kernel/scripts/ipcontroller.py index 26736c4..2606577 100644 --- a/IPython/kernel/scripts/ipcontroller.py +++ b/IPython/kernel/scripts/ipcontroller.py @@ -64,7 +64,10 @@ def make_tub(ip, port, secure, cert_file): if have_crypto: tub = Tub(certFile=cert_file) else: - raise SecurityError("OpenSSL is not available, so we can't run in secure mode, aborting") + raise SecurityError(""" +OpenSSL/pyOpenSSL is not available, so we can't run in secure mode. +Try running without security using 'ipcontroller -xy'. +""") else: tub = UnauthenticatedTub() @@ -202,6 +205,17 @@ def start_controller(): except: log.msg("Error running import_statement: %s" % cis) + # Delete old furl files unless the reuse_furls is set + reuse = config['controller']['reuse_furls'] + if not reuse: + paths = (config['controller']['engine_furl_file'], + config['controller']['controller_interfaces']['task']['furl_file'], + config['controller']['controller_interfaces']['multiengine']['furl_file'] + ) + for p in paths: + if os.path.isfile(p): + os.remove(p) + # Create the service hierarchy main_service = service.MultiService() # The controller service @@ -316,6 +330,12 @@ def init_config(): dest="ipythondir", help="look for config files and profiles in this directory" ) + parser.add_option( + "-r", + action="store_true", + dest="reuse_furls", + help="try to reuse all furl files" + ) (options, args) = parser.parse_args() @@ -349,6 +369,8 @@ def init_config(): config['controller']['engine_tub']['cert_file'] = options.engine_cert_file if options.engine_furl_file is not None: config['controller']['engine_furl_file'] = options.engine_furl_file + if options.reuse_furls is not None: + config['controller']['reuse_furls'] = options.reuse_furls if options.logfile is not None: config['controller']['logfile'] = options.logfile diff --git a/IPython/kernel/scripts/ipengine.py b/IPython/kernel/scripts/ipengine.py index 8d9d1b2..cd671f1 100644 --- a/IPython/kernel/scripts/ipengine.py +++ b/IPython/kernel/scripts/ipengine.py @@ -107,7 +107,11 @@ def start_engine(): furl_file = kernel_config['engine']['furl_file'] log.msg("Using furl file: %s" % furl_file) d = engine_connector.connect_to_controller(engine_service, furl_file) - d.addErrback(lambda _: reactor.stop()) + def handle_error(f): + log.err(f) + if reactor.running: + reactor.stop() + d.addErrback(handle_error) reactor.run() diff --git a/MANIFEST.in b/MANIFEST.in index 5ed5052..880dfd5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,6 @@ -include README_Windows.txt -include win32_manual_post_install.py include ipython.py include setupbase.py +include setupegg.py graft scripts diff --git a/docs/source/changes.txt b/docs/source/changes.txt index d44dd59..f1971d2 100644 --- a/docs/source/changes.txt +++ b/docs/source/changes.txt @@ -21,31 +21,64 @@ What's new 6 Older releases .. -Release DEV +Release dev =========== +New features +------------ + +* The wonderful TextMate editor can now be used with %edit on OS X. Thanks + to Matt Foster for this patch. + +* Fully refactored :command:`ipcluster` command line program for starting + IPython clusters. This new version is a complete rewrite and 1) is fully + cross platform (we now use Twisted's process management), 2) has much + improved performance, 3) uses subcommands for different types of clusters, + 4) uses argparse for parsing command line options, 5) has better support + for starting clusters using :command:`mpirun`, 6) has experimental support + for starting engines using PBS. However, this new version of ipcluster + should be considered a technology preview. We plan on changing the API + in significant ways before it is final. + +* The :mod:`argparse` module has been added to :mod:`IPython.external`. + +* Fully description of the security model added to the docs. + * cd completer: show bookmarks if no other completions are available. -* Remove ipy_leo.py. "easy_install ipython-extension" to get it. - (done to decouple it from ipython release cycle) - * sh profile: easy way to give 'title' to prompt: assign to variable '_prompt_title'. It looks like this:: [~]|1> _prompt_title = 'sudo!' sudo![~]|2> - + +* %edit: If you do '%edit pasted_block', pasted_block + variable gets updated with new data (so repeated + editing makes sense) + +Bug fixes +--------- + +* The ipengine and ipcontroller scripts now handle missing furl files + more gracefully by giving better error messages. + * %rehashx: Aliases no longer contain dots. python3.0 binary will create alias python30. Fixes: #259716 "commands with dots in them don't work" - + * %cpaste: %cpaste -r repeats the last pasted block. The block is assigned to pasted_block even if code raises exception. -* %edit: If you do '%edit pasted_block', pasted_block - variable gets updated with new data (so repeated - editing makes sense) +Backwards incompatible changes +------------------------------ + +* The controller now has a ``-r`` flag that needs to be used if you want to + reuse existing furl files. Otherwise they are deleted (the default). + +* Remove ipy_leo.py. "easy_install ipython-extension" to get it. + (done to decouple it from ipython release cycle) + Release 0.9.1 @@ -62,14 +95,14 @@ Release 0.9 New features ------------ -* All furl files and security certificates are now put in a read-only directory - named ~./ipython/security. +* All furl files and security certificates are now put in a read-only + directory named ~./ipython/security. * A single function :func:`get_ipython_dir`, in :mod:`IPython.genutils` that determines the user's IPython directory in a robust manner. -* Laurent's WX application has been given a top-level script called ipython-wx, - and it has received numerous fixes. We expect this code to be +* Laurent's WX application has been given a top-level script called + ipython-wx, and it has received numerous fixes. We expect this code to be architecturally better integrated with Gael's WX 'ipython widget' over the next few releases. @@ -80,7 +113,7 @@ New features * A new, still experimental but highly functional, WX shell by Gael Varoquaux. This work was sponsored by Enthought, and while it's still very new, it is based on a more cleanly organized arhictecture of the various IPython - components. We will continue to develop this over the next few releases as a + components. We will continue to develop this over the next few releases as a model for GUI components that use IPython. * Another GUI frontend, Cocoa based (Cocoa is the OSX native GUI framework), @@ -131,17 +164,17 @@ New features friends have been completely refactored. Now we are checking for dependencies using the approach that matplotlib uses. -* The documentation has been completely reorganized to accept the documentation - from `ipython1-dev`. +* The documentation has been completely reorganized to accept the + documentation from `ipython1-dev`. * We have switched to using Foolscap for all of our network protocols in :mod:`IPython.kernel`. This gives us secure connections that are both encrypted and authenticated. * We have a brand new `COPYING.txt` files that describes the IPython license - and copyright. The biggest change is that we are putting "The IPython - Development Team" as the copyright holder. We give more details about - exactly what this means in this file. All developer should read this and use + and copyright. The biggest change is that we are putting "The IPython + Development Team" as the copyright holder. We give more details about + exactly what this means in this file. All developer should read this and use the new banner in all IPython source code files. * sh profile: ./foo runs foo as system command, no need to do !./foo anymore diff --git a/docs/source/config/index.txt b/docs/source/config/index.txt index b8263b1..1214811 100644 --- a/docs/source/config/index.txt +++ b/docs/source/config/index.txt @@ -3,7 +3,7 @@ Configuration and customization =============================== .. toctree:: - :maxdepth: 1 + :maxdepth: 2 initial_config.txt customization.txt diff --git a/docs/source/development/config_blueprint.txt b/docs/source/development/config_blueprint.txt new file mode 100644 index 0000000..caa5d13 --- /dev/null +++ b/docs/source/development/config_blueprint.txt @@ -0,0 +1,33 @@ +========================================= +Notes on the IPython configuration system +========================================= + +This document has some random notes on the configuration system. + +To start, an IPython process needs: + +* Configuration files +* Command line options +* Additional files (FURL files, extra scripts, etc.) + +It feeds these things into the core logic of the process, and as output, +produces: + +* Log files +* Security files + +There are a number of things that complicate this: + +* A process may need to be started on a different host that doesn't have + any of the config files or additional files. Those files need to be + moved over and put in a staging area. The process then needs to be told + about them. +* The location of the output files should somehow be set by config files or + command line options. +* Our config files are very hierarchical, but command line options are flat, + making it difficult to relate command line options to config files. +* Some processes (like ipcluster and the daemons) have to manage the input and + output files for multiple different subprocesses, each possibly on a + different host. Ahhhh! +* Our configurations are not singletons. A given user will likely have + many different configurations for different clusters. diff --git a/docs/source/development/development.txt b/docs/source/development/development.txt index 5bd03d5..0e61d88 100644 --- a/docs/source/development/development.txt +++ b/docs/source/development/development.txt @@ -8,143 +8,72 @@ IPython development guidelines Overview ======== -IPython is the next generation of IPython. It is named such for two reasons: - -- Eventually, IPython will become IPython version 1.0. -- This new code base needs to be able to co-exist with the existing IPython until - it is a full replacement for it. Thus we needed a different name. We couldn't - use ``ipython`` (lowercase) as some files systems are case insensitive. - -There are two, no three, main goals of the IPython effort: - -1. Clean up the existing codebase and write lots of tests. -2. Separate the core functionality of IPython from the terminal to enable IPython - to be used from within a variety of GUI applications. -3. Implement a system for interactive parallel computing. - -While the third goal may seem a bit unrelated to the main focus of IPython, it -turns out that the technologies required for this goal are nearly identical -with those required for goal two. This is the main reason the interactive -parallel computing capabilities are being put into IPython proper. Currently -the third of these goals is furthest along. - -This document describes IPython from the perspective of developers. - - -Project organization -==================== - -Subpackages ------------ - -IPython is organized into semi self-contained subpackages. Each of the -subpackages will have its own: - -- **Dependencies**. One of the most important things to keep in mind in - partitioning code amongst subpackages, is that they should be used to cleanly - encapsulate dependencies. - -- **Tests**. Each subpackage shoud have its own ``tests`` subdirectory that - contains all of the tests for that package. For information about writing - tests for IPython, see the `Testing System`_ section of this document. - -- **Configuration**. Each subpackage should have its own ``config`` - subdirectory that contains the configuration information for the components - of the subpackage. For information about how the IPython configuration - system works, see the `Configuration System`_ section of this document. - -- **Scripts**. Each subpackage should have its own ``scripts`` subdirectory - that contains all of the command line scripts associated with the subpackage. - -Installation and dependencies ------------------------------ - -IPython will not use `setuptools`_ for installation. Instead, we will use -standard ``setup.py`` scripts that use `distutils`_. While there are a number a -extremely nice features that `setuptools`_ has (like namespace packages), the -current implementation of `setuptools`_ has performance problems, particularly -on shared file systems. In particular, when Python packages are installed on -NSF file systems, import times become much too long (up towards 10 seconds). - -Because IPython is being used extensively in the context of high performance -computing, where performance is critical but shared file systems are common, we -feel these performance hits are not acceptable. Thus, until the performance -problems associated with `setuptools`_ are addressed, we will stick with plain -`distutils`_. We are hopeful that these problems will be addressed and that we -will eventually begin using `setuptools`_. Because of this, we are trying to -organize IPython in a way that will make the eventual transition to -`setuptools`_ as painless as possible. - -Because we will be using `distutils`_, there will be no method for -automatically installing dependencies. Instead, we are following the approach -of `Matplotlib`_ which can be summarized as follows: - -- Distinguish between required and optional dependencies. However, the required - dependencies for IPython should be only the Python standard library. - -- Upon installation check to see which optional dependencies are present and - tell the user which parts of IPython need which optional dependencies. - -It is absolutely critical that each subpackage of IPython has a clearly -specified set of dependencies and that dependencies are not carelessly -inherited from other IPython subpackages. Furthermore, tests that have certain -dependencies should not fail if those dependencies are not present. Instead -they should be skipped and print a message. - -.. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools -.. _distutils: http://docs.python.org/lib/module-distutils.html -.. _Matplotlib: http://matplotlib.sourceforge.net/ - -Specific subpackages --------------------- - -``core`` - This is the core functionality of IPython that is independent of the - terminal, network and GUIs. Most of the code that is in the current - IPython trunk will be refactored, cleaned up and moved here. - -``kernel`` - The enables the IPython core to be expose to a the network. This is - also where all of the parallel computing capabilities are to be found. - -``config`` - The configuration package used by IPython. +This document describes IPython from the perspective of developers. Most +importantly, it gives information for people who want to contribute to the +development of IPython. So if you want to help out, read on! + +How to contribute to IPython +============================ + +IPython development is done using Bazaar [Bazaar]_ and Launchpad [Launchpad]_. +This makes it easy for people to contribute to the development of IPython. +Here is a sketch of how to get going. -``frontends`` - The various frontends for IPython. A frontend is the end-user application - that exposes the capabilities of IPython to the user. The most basic - frontend will simply be a terminal based application that looks just like - today 's IPython. Other frontends will likely be more powerful and based - on GUI toolkits. +Install Bazaar and create a Launchpad account +--------------------------------------------- -``notebook`` - An application that allows users to work with IPython notebooks. +First make sure you have installed Bazaar (see their `website +`_). To see that Bazaar is installed and knows about +you, try the following:: -``tools`` - This is where general utilities go. + $ bzr whoami + Joe Coder +This should display your name and email. Next, you will want to create an +account on the `Launchpad website `_ and setup your +ssh keys. For more information of setting up your ssh keys, see `this link +`_. -Version control -=============== +Get the main IPython branch from Launchpad +------------------------------------------ -In the past, IPython development has been done using `Subversion`__. Recently, -we made the transition to using `Bazaar`__ and `Launchpad`__. This makes it -much easier for people to contribute code to IPython. Here is a sketch of how -to use Bazaar for IPython development. First, you should install Bazaar. -After you have done that, make sure that it is working by getting the latest -main branch of IPython:: +Now, you can get a copy of the main IPython development branch (we call this +the "trunk"):: $ bzr branch lp:ipython -Now you can create a new branch for you to do your work in:: +Create a working branch +----------------------- + +When working on IPython, you won't actually make edits directly to the +:file:`lp:ipython` branch. Instead, you will create a separate branch for your +changes. For now, let's assume you want to do your work in a branch named +"ipython-mybranch". Create this branch by doing:: $ bzr branch ipython ipython-mybranch -The typical work cycle in this branch will be to make changes in -``ipython-mybranch`` and then commit those changes using the commit command:: +When you actually create a branch, you will want to give it a name that +reflects the nature of the work that you will be doing in it, like +"install-docs-update". + +Make edits in your working branch +--------------------------------- + +Now you are ready to actually make edits in your :file:`ipython-mybranch` +branch. Before doing this, it is helpful to install this branch so you can +test your changes as you work. This is easiest if you have setuptools +installed. Then, just do:: + + $ cd ipython-mybranch + $ python setupegg.py develop + +Now, make some changes. After a while, you will want to commit your changes. +This let's Bazaar know that you like the changes you have made and gives you +an opportunity to keep a nice record of what you have done. This looks like +this:: $ ...do work in ipython-mybranch... - $ bzr ci -m "the commit message goes here" + $ bzr commit -m "the commit message goes here" Please note that since we now don't use an old-style linear ChangeLog (that tends to cause problems with distributed version control systems), you should @@ -154,48 +83,60 @@ approach in the commit messages (including the second line being left Single line summary of changes being committed. - - more details when warranted ... - - including crediting outside contributors if they sent the + * more details when warranted ... + * including crediting outside contributors if they sent the code/bug/idea! -If we couple this with a policy of making single commits for each reasonably -atomic change, the bzr log should give an excellent view of the project, and -the `--short` log option becomes a nice summary. +As you work, you will repeat this edit/commit cycle many times. If you work on +your branch for a long time, you will also want to get the latest changes from +the :file:`lp:ipython` branch. This can be done with the following sequence of +commands:: -While working with this branch, it is a good idea to merge in changes that have -been made upstream in the parent branch. This can be done by doing:: - - $ bzr pull + $ ls + ipython + ipython-mybranch -If this command shows that the branches have diverged, then you should do a -merge instead:: + $ cd ipython + $ bzr pull + $ cd ../ipython-mybranch + $ bzr merge ../ipython + $ bzr commit -m "Merging changes from trunk" - $ bzr merge lp:ipython +Along the way, you should also run the IPython test suite. You can do this using the :command:`iptest` command:: -If you want others to be able to see your branch, you can create an account -with launchpad and push the branch to your own workspace:: + $ cd + $ iptest - $ bzr push bzr+ssh://@bazaar.launchpad.net/~/+junk/ipython-mybranch +The :command:`iptest` command will also pick up and run any tests you have written. -Finally, once the work in your branch is done, you can merge your changes back -into the `ipython` branch by using merge:: +Post your branch and request a code review +------------------------------------------ - $ cd ipython - $ merge ../ipython-mybranch - [resolve any conflicts] - $ bzr ci -m "Fixing that bug" - $ bzr push +Once you are done with your edits, you should post your branch on Launchpad so +that other IPython developers can review the changes and help you merge your +changes into the main development branch. To post your branch on Launchpad, +do:: + + $ cd ipython-mybranch + $ bzr push lp:~yourusername/ipython/ipython-mybranch + +Then, go to the `IPython Launchpad site `_, and you +should see your branch under the "Code" tab. If you click on your branch, you +can provide a short description of the branch as well as mark its status. Most +importantly, you should click the link that reads "Propose for merging into +another branch". What does this do? -But this will require you to have write permissions to the `ipython` branch. -It you don't you can tell one of the IPython devs about your branch and they -can do the merge for you. +This let's the other IPython developers know that your branch is ready to be +reviewed and merged into the main development branch. During this review +process, other developers will give you feedback and help you get your code +ready to be merged. What types of things will we be looking for: -More information about Bazaar workflows can be found `here`__. +* All code is documented. +* All code has tests. +* The entire IPython test suite passes. -.. __: http://subversion.tigris.org/ -.. __: http://bazaar-vcs.org/ -.. __: http://www.launchpad.net/ipython -.. __: http://doc.bazaar-vcs.org/bzr.dev/en/user-guide/index.html +Once your changes have been reviewed and approved, someone will merge them +into the main development branch. Documentation ============= @@ -204,39 +145,31 @@ Standalone documentation ------------------------ All standalone documentation should be written in plain text (``.txt``) files -using `reStructuredText`_ for markup and formatting. All such documentation -should be placed in the top level directory ``docs`` of the IPython source -tree. Or, when appropriate, a suitably named subdirectory should be used. The -documentation in this location will serve as the main source for IPython -documentation and all existing documentation should be converted to this -format. +using reStructuredText [reStructuredText]_ for markup and formatting. All such +documentation should be placed in directory :file:`docs/source` of the IPython +source tree. The documentation in this location will serve as the main source +for IPython documentation and all existing documentation should be converted +to this format. -In the future, the text files in the ``docs`` directory will be used to -generate all forms of documentation for IPython. This include documentation on -the IPython website as well as *pdf* documentation. +To build the final documentation, we use Sphinx [Sphinx]_. Once you have Sphinx installed, you can build the html docs yourself by doing:: -.. _reStructuredText: http://docutils.sourceforge.net/rst.html + $ cd ipython-mybranch/docs + $ make html Docstring format ---------------- -Good docstrings are very important. All new code will use `Epydoc`_ for -generating API docs, so we will follow the `Epydoc`_ conventions. More -specifically, we will use `reStructuredText`_ for markup and formatting, since -it is understood by a wide variety of tools. This means that if in the future -we have any reason to change from `Epydoc`_ to something else, we'll have fewer -transition pains. - -Details about using `reStructuredText`_ for docstrings can be found `here +Good docstrings are very important. All new code should have docstrings that +are formatted using reStructuredText for markup and formatting, since it is +understood by a wide variety of tools. Details about using reStructuredText +for docstrings can be found `here `_. -.. _Epydoc: http://epydoc.sourceforge.net/ - Additional PEPs of interest regarding documentation of code: -- `Docstring Conventions `_ -- `Docstring Processing System Framework `_ -- `Docutils Design Specification `_ +* `Docstring Conventions `_ +* `Docstring Processing System Framework `_ +* `Docutils Design Specification `_ Coding conventions @@ -248,17 +181,16 @@ General In general, we'll try to follow the standard Python style conventions as described here: -- `Style Guide for Python Code `_ +* `Style Guide for Python Code `_ Other comments: -- In a large file, top level classes and functions should be +* In a large file, top level classes and functions should be separated by 2-3 lines to make it easier to separate them visually. -- Use 4 spaces for indentation. -- Keep the ordering of methods the same in classes that have the same - methods. This is particularly true for classes that implement - similar interfaces and for interfaces that are similar. +* Use 4 spaces for indentation. +* Keep the ordering of methods the same in classes that have the same + methods. This is particularly true for classes that implement an interface. Naming conventions ------------------ @@ -268,38 +200,33 @@ Guide for Python Code`_. For all new IPython code (and much existing code is being refactored), we'll use: -- All ``lowercase`` module names. +* All ``lowercase`` module names. -- ``CamelCase`` for class names. +* ``CamelCase`` for class names. -- ``lowercase_with_underscores`` for methods, functions, variables and +* ``lowercase_with_underscores`` for methods, functions, variables and attributes. -This may be confusing as most of the existing IPython codebase uses a different -convention (``lowerCamelCase`` for methods and attributes). Slowly, we will -move IPython over to the new convention, providing shadow names for backward -compatibility in public interfaces. - -There are, however, some important exceptions to these rules. In some cases, +There are, however, some important exceptions to these rules. In some cases, IPython code will interface with packages (Twisted, Wx, Qt) that use other -conventions. At some level this makes it impossible to adhere to our own -standards at all times. In particular, when subclassing classes that use other -naming conventions, you must follow their naming conventions. To deal with +conventions. At some level this makes it impossible to adhere to our own +standards at all times. In particular, when subclassing classes that use other +naming conventions, you must follow their naming conventions. To deal with cases like this, we propose the following policy: -- If you are subclassing a class that uses different conventions, use its +* If you are subclassing a class that uses different conventions, use its naming conventions throughout your subclass. Thus, if you are creating a Twisted Protocol class, used Twisted's ``namingSchemeForMethodsAndAttributes.`` -- All IPython's official interfaces should use our conventions. In some cases +* All IPython's official interfaces should use our conventions. In some cases this will mean that you need to provide shadow names (first implement ``fooBar`` and then ``foo_bar = fooBar``). We want to avoid this at all costs, but it will probably be necessary at times. But, please use this sparingly! Implementation-specific *private* methods will use -``_single_underscore_prefix``. Names with a leading double underscore will +``_single_underscore_prefix``. Names with a leading double underscore will *only* be used in special cases, as they makes subclassing difficult (such names are not easily seen by child classes). @@ -323,107 +250,38 @@ specific ``IPY`` or ``ipy`` are preferred. Testing system ============== -It is extremely important that all code contributed to IPython has tests. Tests -should be written as unittests, doctests or as entities that the `Nose`_ -testing package will find. Regardless of how the tests are written, we will use -`Nose`_ for discovering and running the tests. `Nose`_ will be required to run -the IPython test suite, but will not be required to simply use IPython. +It is extremely important that all code contributed to IPython has tests. +Tests should be written as unittests, doctests or as entities that the Nose +[Nose]_ testing package will find. Regardless of how the tests are written, we +will use Nose for discovering and running the tests. Nose will be required to +run the IPython test suite, but will not be required to simply use IPython. -.. _Nose: http://code.google.com/p/python-nose/ +Tests of Twisted using code need to follow two additional guidelines: -Tests of `Twisted`__ using code should be written by subclassing the -``TestCase`` class that comes with ``twisted.trial.unittest``. When this is -done, `Nose`_ will be able to run the tests and the twisted reactor will be -handled correctly. +1. Twisted using tests should be written by subclassing the :class:`TestCase` + class that comes with :mod:`twisted.trial.unittest`. -.. __: http://www.twistedmatrix.com +2. All :class:`Deferred` instances that are created in the test must be + properly chained and the final one *must* be the return value of the test + method. -Each subpackage in IPython should have its own ``tests`` directory that +When these two things are done, Nose will be able to run the tests and the +twisted reactor will be handled correctly. + +Each subpackage in IPython should have its own :file:`tests` directory that contains all of the tests for that subpackage. This allows each subpackage to be self-contained. If a subpackage has any dependencies beyond the Python standard library, the tests for that subpackage should be skipped if the dependencies are not found. This is very important so users don't get tests failing simply because they don't have dependencies. -We also need to look into use Noses ability to tag tests to allow a more -modular approach of running tests. +To run the IPython test suite, use the :command:`iptest` command that is installed with IPython:: -.. _devel_config: + $ iptest -Configuration system -==================== - -IPython uses `.ini`_ files for configuration purposes. This represents a huge -improvement over the configuration system used in IPython. IPython works with -these files using the `ConfigObj`_ package, which IPython includes as -``ipython1/external/configobj.py``. - -Currently, we are using raw `ConfigObj`_ objects themselves. Each subpackage of -IPython should contain a ``config`` subdirectory that contains all of the -configuration information for the subpackage. To see how configuration -information is defined (along with defaults) see at the examples in -``ipython1/kernel/config`` and ``ipython1/core/config``. Likewise, to see how -the configuration information is used, see examples in -``ipython1/kernel/scripts/ipengine.py``. - -Eventually, we will add a new layer on top of the raw `ConfigObj`_ objects. We -are calling this new layer, ``tconfig``, as it will use a `Traits`_-like -validation model. We won't actually use `Traits`_, but will implement -something similar in pure Python. But, even in this new system, we will still -use `ConfigObj`_ and `.ini`_ files underneath the hood. Talk to Fernando if you -are interested in working on this part of IPython. The current prototype of -``tconfig`` is located in the IPython sandbox. - -.. _.ini: http://docs.python.org/lib/module-ConfigParser.html -.. _ConfigObj: http://www.voidspace.org.uk/python/configobj.html -.. _Traits: http://code.enthought.com/traits/ - - -Installation and testing scenarios -================================== - -This section outlines the various scenarios that we need to test before we -release an IPython version. These scenarios represent different ways of -installing IPython and its dependencies. - -Installation scenarios under Linux and OS X -------------------------------------------- - - 1. Install from tarball using ``python setup.py install``. - a. With only readline+nose dependencies installed. - b. With all dependencies installed (readline, zope.interface, Twisted, - foolscap, Sphinx, nose, pyOpenSSL). - - 2. Install using easy_install. - - a. With only readline+nose dependencies installed. - i. Default dependencies: ``easy_install ipython-0.9.beta3-py2.5.egg`` - ii. Optional dependency sets: ``easy_install -f ipython-0.9.beta3-py2.5.egg IPython[kernel,doc,test,security]`` - - b. With all dependencies already installed. - - -Installation scenarios under Win32 ----------------------------------- - - 1. Install everything from .exe installers - 2. easy_install? - - -Tests to run for these scenarios --------------------------------- - - 1. Run the full test suite. - 2. Start a controller and engines and try a few things by hand. - a. Using ipcluster. - b. Using ipcontroller/ipengine by hand. - - 3. Run a few of the parallel examples. - 4. Try the kernel with and without security with and without PyOpenSSL - installed. - 5. Beat on the IPython terminal a bunch. - 6. Make sure that furl files are being put in proper locations. +This command runs Nose with the proper options and extensions. +.. _devel_config: Release checklist ================= @@ -434,9 +292,9 @@ Most of the release process is automated by the :file:`release` script in the #. Run the release script, which makes the tar.gz, eggs and Win32 .exe installer. It posts them to the site and registers the release with PyPI. -#. Updating the website with announcements and links to the updated changes.txt - in html form. Remember to put a short note both on the news page of the site - and on launcphad. +#. Updating the website with announcements and links to the updated + changes.txt in html form. Remember to put a short note both on the news + page of the site and on Launcphad. #. Drafting a short release announcement with i) highlights and ii) a link to the html changes.txt. @@ -444,3 +302,9 @@ Most of the release process is automated by the :file:`release` script in the #. Make sure that the released version of the docs is live on the site. #. Celebrate! + +.. [Bazaar] Bazaar. http://bazaar-vcs.org/ +.. [Launchpad] Launchpad. http://www.launchpad.net/ipython +.. [reStructuredText] reStructuredText. http://docutils.sourceforge.net/rst.html +.. [Sphinx] Sphinx. http://sphinx.pocoo.org/ +.. [Nose] Nose: a discovery based unittest extension. http://code.google.com/p/python-nose/ diff --git a/docs/source/development/index.txt b/docs/source/development/index.txt index c3796e0..ca18e64 100644 --- a/docs/source/development/index.txt +++ b/docs/source/development/index.txt @@ -8,3 +8,4 @@ Development development.txt roadmap.txt notification_blueprint.txt + config_blueprint.txt diff --git a/docs/source/development/notification_blueprint.txt b/docs/source/development/notification_blueprint.txt index ffb8c73..8b0dd75 100644 --- a/docs/source/development/notification_blueprint.txt +++ b/docs/source/development/notification_blueprint.txt @@ -6,27 +6,50 @@ IPython.kernel.core.notification blueprint Overview ======== -The :mod:`IPython.kernel.core.notification` module will provide a simple implementation of a notification center and support for the observer pattern within the :mod:`IPython.kernel.core`. The main intended use case is to provide notification of Interpreter events to an observing frontend during the execution of a single block of code. + +The :mod:`IPython.kernel.core.notification` module will provide a simple +implementation of a notification center and support for the observer pattern +within the :mod:`IPython.kernel.core`. The main intended use case is to +provide notification of Interpreter events to an observing frontend during the +execution of a single block of code. Functional Requirements ======================= + The notification center must: - * Provide synchronous notification of events to all registered observers. - * Provide typed or labeled notification types - * Allow observers to register callbacks for individual or all notification types - * Allow observers to register callbacks for events from individual or all notifying objects - * Notification to the observer consists of the notification type, notifying object and user-supplied extra information [implementation: as keyword parameters to the registered callback] - * Perform as O(1) in the case of no registered observers. - * Permit out-of-process or cross-network extension. + +* Provide synchronous notification of events to all registered observers. + +* Provide typed or labeled notification types. + +* Allow observers to register callbacks for individual or all notification + types. + +* Allow observers to register callbacks for events from individual or all + notifying objects. + +* Notification to the observer consists of the notification type, notifying + object and user-supplied extra information [implementation: as keyword + parameters to the registered callback]. + +* Perform as O(1) in the case of no registered observers. + +* Permit out-of-process or cross-network extension. What's not included -============================================================== +=================== + As written, the :mod:`IPython.kernel.core.notificaiton` module does not: - * Provide out-of-process or network notifications [these should be handled by a separate, Twisted aware module in :mod:`IPython.kernel`]. - * Provide zope.interface-style interfaces for the notification system [these should also be provided by the :mod:`IPython.kernel` module] + +* Provide out-of-process or network notifications (these should be handled by + a separate, Twisted aware module in :mod:`IPython.kernel`). + +* Provide zope.interface-style interfaces for the notification system (these + should also be provided by the :mod:`IPython.kernel` module). Use Cases ========= + The following use cases describe the main intended uses of the notificaiton module and illustrate the main success scenario for each use case: 1. Dwight Schroot is writing a frontend for the IPython project. His frontend is stuck in the stone age and must communicate synchronously with an IPython.kernel.core.Interpreter instance. Because code is executed in blocks by the Interpreter, Dwight's UI freezes every time he executes a long block of code. To keep track of the progress of his long running block, Dwight adds the following code to his frontend's set-up code:: @@ -40,9 +63,20 @@ The following use cases describe the main intended uses of the notificaiton modu def stdout_notification(self, type, notifying_object, out_string=None): self.writeStdOut(out_string) - If everything works, the Interpreter will (according to its published API) fire a notification via the :data:`IPython.kernel.core.notification.sharedCenter` of type :const:`STD_OUT_NOTIFICATION_TYPE` before writing anything to stdout [it's up to the Intereter implementation to figure out when to do this]. The notificaiton center will then call the registered callbacks for that event type (in this case, Dwight's frontend's stdout_notification method). Again, according to its API, the Interpreter provides an additional keyword argument when firing the notificaiton of out_string, a copy of the string it will write to stdout. +If everything works, the Interpreter will (according to its published API) +fire a notification via the +:data:`IPython.kernel.core.notification.sharedCenter` of type +:const:`STD_OUT_NOTIFICATION_TYPE` before writing anything to stdout [it's up +to the Intereter implementation to figure out when to do this]. The +notificaiton center will then call the registered callbacks for that event +type (in this case, Dwight's frontend's stdout_notification method). Again, +according to its API, the Interpreter provides an additional keyword argument +when firing the notificaiton of out_string, a copy of the string it will write +to stdout. - Like magic, Dwight's frontend is able to provide output, even during long-running calculations. Now if Jim could just convince Dwight to use Twisted... +Like magic, Dwight's frontend is able to provide output, even during +long-running calculations. Now if Jim could just convince Dwight to use +Twisted... 2. Boss Hog is writing a frontend for the IPython project. Because Boss Hog is stuck in the stone age, his frontend will be written in a new Fortran-like dialect of python and will run only from the command line. Because he doesn't need any fancy notification system and is used to worrying about every cycle on his rat-wheel powered mini, Boss Hog is adamant that the new notification system not produce any performance penalty. As they say in Hazard county, there's no such thing as a free lunch. If he wanted zero overhead, he should have kept using IPython 0.8. Instead, those tricky Duke boys slide in a suped-up bridge-out jumpin' awkwardly confederate-lovin' notification module that imparts only a constant (and small) performance penalty when the Interpreter (or any other object) fires an event for which there are no registered observers. Of course, the same notificaiton-enabled Interpreter can then be used in frontends that require notifications, thus saving the IPython project from a nasty civil war. diff --git a/docs/source/development/roadmap.txt b/docs/source/development/roadmap.txt index f74372e..2a097a2 100644 --- a/docs/source/development/roadmap.txt +++ b/docs/source/development/roadmap.txt @@ -4,104 +4,78 @@ Development roadmap =================== -.. contents:: - IPython is an ambitious project that is still under heavy development. However, we want IPython to become useful to as many people as possible, as quickly as possible. To help us accomplish this, we are laying out a roadmap of where we are headed and what needs to happen to get there. Hopefully, this will help the IPython developers figure out the best things to work on for each upcoming release. -Speaking of releases, we are going to begin releasing a new version of IPython every four weeks. We are hoping that a regular release schedule, along with a clear roadmap of where we are headed will propel the project forward. - -Where are we headed -=================== - -Our goal with IPython is simple: to provide a *powerful*, *robust* and *easy to use* framework for parallel computing. While there are other secondary goals you will hear us talking about at various times, this is the primary goal of IPython that frames the roadmap. - -Steps along the way -=================== - -Here we describe the various things that we need to work on to accomplish this goal. - -Setting up for regular release schedule ---------------------------------------- - -We would like to begin to release IPython regularly (probably a 4 week release cycle). To get ready for this, we need to revisit the development guidelines and put in information about releasing IPython. +Work targeted to particular releases +==================================== -Process startup and management ------------------------------- +Release 0.10 +------------ -IPython is implemented using a distributed set of processes that communicate using TCP/IP network channels. Currently, users have to start each of the various processes separately using command line scripts. This is both difficult and error prone. Furthermore, there are a number of things that often need to be managed once the processes have been started, such as the sending of signals and the shutting down and cleaning up of processes. +* Initial refactor of :command:`ipcluster`. -We need to build a system that makes it trivial for users to start and manage IPython processes. This system should have the following properties: +* Better TextMate integration. -* It should possible to do everything through an extremely simple API that users - can call from their own Python script. No shell commands should be needed. - -* This simple API should be configured using standard .ini files. +* Merge in the daemon branch. -* The system should make it possible to start processes using a number of different - approaches: SSH, PBS/Torque, Xgrid, Windows Server, mpirun, etc. - -* The controller and engine processes should each have a daemon for monitoring, - signaling and clean up. - -* The system should be secure. +Release 0.11 +------------ -* The system should work under all the major operating systems, including - Windows. +* Refactor the configuration system and command line options for + :command:`ipengine` and :command:`ipcontroller`. This will include the + creation of cluster directories that encapsulate all the configuration + files, log files and security related files for a particular cluster. -Initial work has begun on the daemon infrastructure, and some of the needed logic is contained in the ipcluster script. +* Refactor :command:`ipcluster` to support the new configuration system. -Ease of use/high-level approaches to parallelism ------------------------------------------------- +* Refactor the daemon stuff to support the new configuration system. -While our current API for clients is well designed, we can still do a lot better in designing a user-facing API that is super simple. The main goal here is that it should take *almost no extra code* for users to get their code running in parallel. For this to be possible, we need to tie into Python's standard idioms that enable efficient coding. The biggest ones we are looking at are using context managers (i.e., Python 2.5's ``with`` statement) and decorators. Initial work on this front has begun, but more work is needed. +* Merge back in the core of the notebook. -We also need to think about new models for expressing parallelism. This is fun work as most of the foundation has already been established. +Release 0.12 +------------ -Security --------- +* Fully integrate process startup with the daemons for full process + management. -Currently, IPython has no built in security or security model. Because we would like IPython to be usable on public computer systems and over wide area networks, we need to come up with a robust solution for security. Here are some of the specific things that need to be included: +* Make the capabilites of :command:`ipcluster` available from simple Python + classes. -* User authentication between all processes (engines, controller and clients). +Major areas of work +=================== -* Optional TSL/SSL based encryption of all communication channels. +Refactoring the main IPython core +--------------------------------- -* A good way of picking network ports so multiple users on the same system can - run their own controller and engines without interfering with those of others. - -* A clear model for security that enables users to evaluate the security risks - associated with using IPython in various manners. +Process management for :mod:`IPython.kernel` +-------------------------------------------- -For the implementation of this, we plan on using Twisted's support for SSL and authentication. One things that we really should look at is the `Foolscap`_ network protocol, which provides many of these things out of the box. +Configuration system +-------------------- -.. _Foolscap: http://foolscap.lothar.com/trac +Performance problems +-------------------- -The security work needs to be done in conjunction with other network protocol stuff. +Currently, we have a number of performance issues that are waiting to bite users: -As of the 0.9 release of IPython, we are using Foolscap and we have implemented -a full security model. +* The controller stores a large amount of state in Python dictionaries. Under + heavy usage, these dicts with get very large, causing memory usage problems. + We need to develop more scalable solutions to this problem, such as using a + sqlite database to store this state. This will also help the controller to + be more fault tolerant. -Latent performance issues -------------------------- +* We currently don't have a good way of handling large objects in the + controller. The biggest problem is that because we don't have any way of + streaming objects, we get lots of temporary copies in the low-level buffers. + We need to implement a better serialization approach and true streaming + support. -Currently, we have a number of performance issues that are waiting to bite users: +* The controller currently unpickles and repickles objects. We need to use the + [push|pull]_serialized methods instead. - * The controller store a large amount of state in Python dictionaries. Under heavy - usage, these dicts with get very large, causing memory usage problems. We need to - develop more scalable solutions to this problem, such as using a sqlite database - to store this state. This will also help the controller to be more fault tolerant. - * Currently, the client to controller connections are done through XML-RPC using - HTTP 1.0. This is very inefficient as XML-RPC is a very verbose protocol and - each request must be handled with a new connection. We need to move these network - connections over to PB or Foolscap. Done! - * We currently don't have a good way of handling large objects in the controller. - The biggest problem is that because we don't have any way of streaming objects, - we get lots of temporary copies in the low-level buffers. We need to implement - a better serialization approach and true streaming support. - * The controller currently unpickles and repickles objects. We need to use the - [push|pull]_serialized methods instead. - * Currently the controller is a bottleneck. We need the ability to scale the - controller by aggregating multiple controllers into one effective controller. +* Currently the controller is a bottleneck. The best approach for this is to + separate the controller itself into multiple processes, one for the core + controller and one each for the controller interfaces. diff --git a/docs/source/index.txt b/docs/source/index.txt index bde7c08..688e167 100644 --- a/docs/source/index.txt +++ b/docs/source/index.txt @@ -24,9 +24,7 @@ IPython Documentation license_and_copyright.txt credits.txt - .. htmlonly:: - - * :ref:`genindex` - * :ref:`modindex` - * :ref:`search` + * :ref:`genindex` + * :ref:`modindex` + * :ref:`search` diff --git a/docs/source/install/install.txt b/docs/source/install/install.txt index 4388cc2..41c5050 100644 --- a/docs/source/install/install.txt +++ b/docs/source/install/install.txt @@ -16,6 +16,19 @@ Some of the installation approaches use the :mod:`setuptools` package and its :c More general information about installing Python packages can be found in Python's documentation at http://www.python.org/doc/. +Quickstart +========== + +If you have :mod:`setuptools` installed and you are on OS X or Linux (not Windows), the following will download and install IPython *and* the main optional dependencies:: + + $ easy_install ipython[kernel,security,test] + +This will get Twisted, zope.interface and Foolscap, which are needed for IPython's parallel computing features as well as the nose package, which will enable you to run IPython's test suite. To run IPython's test suite, use the :command:`iptest` command:: + + $ iptest + +Read on for more specific details and instructions for Windows. + Installing IPython itself ========================= @@ -26,7 +39,7 @@ Installation using easy_install If you have :mod:`setuptools` installed, the easiest way of getting IPython is to simple use :command:`easy_install`:: - $ easy_install IPython + $ easy_install ipython That's it. @@ -106,7 +119,13 @@ Most users on OS X will want to get the full :mod:`readline` module. To get a w If needed, the readline egg can be build and installed from source (see the wiki page at http://ipython.scipy.org/moin/InstallationOSXLeopard). -On Windows, you will need the PyReadline module. PyReadline is a separate, Windows only implementation of readline that uses native Windows calls through :mod:`ctypes`. The easiest way of installing PyReadline is you use the binary installer available `here `_. The :mod:`ctypes` module, which comes with Python 2.5 and greater, is required by PyReadline. It is available for Python 2.4 at http://python.net/crew/theller/ctypes. +On Windows, you will need the PyReadline module. PyReadline is a separate, +Windows only implementation of readline that uses native Windows calls through +:mod:`ctypes`. The easiest way of installing PyReadline is you use the binary +installer available `here `_. The +:mod:`ctypes` module, which comes with Python 2.5 and greater, is required by +PyReadline. It is available for Python 2.4 at +http://python.net/crew/theller/ctypes. nose ---- @@ -117,7 +136,7 @@ To run the IPython test suite you will need the :mod:`nose` package. Nose provi Another way of getting this is to do:: - $ easy_install IPython[test] + $ easy_install ipython[test] For more installation options, see the `nose website `_. Once you have nose installed, you can run IPython's test suite using the iptest command:: @@ -145,13 +164,15 @@ The IPython kernel provides a nice architecture for parallel computing. The mai On a Unix style platform (including OS X), if you want to use :mod:`setuptools`, you can just do:: - $ easy_install IPython[kernel] # the first three - $ easy_install IPython[security] # pyOpenSSL + $ easy_install ipython[kernel] # the first three + $ easy_install ipython[security] # pyOpenSSL zope.interface and Twisted -------------------------- -On Unix style platforms (including OS X), the simplest way of getting the these is to use :command:`easy_install`:: +Twisted [Twisted]_ and zope.interface [ZopeInterface]_ are used for networking related things. On Unix +style platforms (including OS X), the simplest way of getting the these is to +use :command:`easy_install`:: $ easy_install zope.interface $ easy_install Twisted @@ -163,7 +184,7 @@ Windows is a bit different. For zope.interface and Twisted, simply get the late Foolscap -------- -Foolscap uses Twisted to provide a very nice secure RPC protocol that we use to implement our parallel computing features. +Foolscap [Foolscap]_ uses Twisted to provide a very nice secure RPC protocol that we use to implement our parallel computing features. On all platforms a simple:: @@ -174,7 +195,7 @@ should work. You can also download the source tarballs from the `Foolscap websi pyOpenSSL --------- -IPython requires an older version of pyOpenSSL (0.6 rather than the current 0.7). There are a couple of options for getting this: +IPython requires an older version of pyOpenSSL [pyOpenSSL]_ (0.6 rather than the current 0.7). There are a couple of options for getting this: 1. Most Linux distributions have packages for pyOpenSSL. 2. The built-in Python 2.5 on OS X 10.5 already has it installed. @@ -188,4 +209,9 @@ Dependencies for IPython.frontend (the IPython GUI) wxPython -------- -Starting with IPython 0.9, IPython has a new IPython.frontend package that has a nice wxPython based IPython GUI. As you would expect, this GUI requires wxPython. Most Linux distributions have wxPython packages available and the built-in Python on OS X comes with wxPython preinstalled. For Windows, a binary installer is available on the `wxPython website `_. \ No newline at end of file +Starting with IPython 0.9, IPython has a new IPython.frontend package that has a nice wxPython based IPython GUI. As you would expect, this GUI requires wxPython. Most Linux distributions have wxPython packages available and the built-in Python on OS X comes with wxPython preinstalled. For Windows, a binary installer is available on the `wxPython website `_. + +.. [Twisted] Twisted matrix. http://twistedmatrix.org +.. [ZopeInterface] http://pypi.python.org/pypi/zope.interface +.. [Foolscap] Foolscap network protocol. http://foolscap.lothar.com/trac +.. [pyOpenSSL] pyOpenSSL. http://pyopenssl.sourceforge.net \ No newline at end of file diff --git a/docs/source/interactive/index.txt b/docs/source/interactive/index.txt index 01111b0..ae45bc5 100644 --- a/docs/source/interactive/index.txt +++ b/docs/source/interactive/index.txt @@ -3,7 +3,7 @@ Using IPython for interactive work ================================== .. toctree:: - :maxdepth: 1 + :maxdepth: 2 tutorial.txt reference.txt diff --git a/docs/source/interactive/reference.txt b/docs/source/interactive/reference.txt index ef60af4..f81587f 100644 --- a/docs/source/interactive/reference.txt +++ b/docs/source/interactive/reference.txt @@ -1,13 +1,7 @@ -.. IPython documentation master file, created by sphinx-quickstart.py on Mon Mar 24 17:01:34 2008. - You can adapt this file completely to your liking, but it should at least - contain the root 'toctree' directive. - ================= IPython reference ================= -.. contents:: - .. _command_line_options: Command-line usage diff --git a/docs/source/interactive/tutorial.txt b/docs/source/interactive/tutorial.txt index e7271d5..23a2854 100644 --- a/docs/source/interactive/tutorial.txt +++ b/docs/source/interactive/tutorial.txt @@ -4,8 +4,6 @@ Quick IPython tutorial ====================== -.. contents:: - IPython can be used as an improved replacement for the Python prompt, and for that you don't really need to read any more of this manual. But in this section we'll try to summarize a few tips on how to make the diff --git a/docs/source/license_and_copyright.txt b/docs/source/license_and_copyright.txt index 1c9840e..9c61f1e 100644 --- a/docs/source/license_and_copyright.txt +++ b/docs/source/license_and_copyright.txt @@ -13,39 +13,43 @@ IPython is licensed under the terms of the new or revised BSD license, as follow All rights reserved. - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, this list of - conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, this list - of conditions and the following disclaimer in the documentation and/or other - materials provided with the distribution. - - Neither the name of the IPython Development Team nor the names of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY - EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of the IPython Development Team nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. About the IPython Development Team ================================== -Fernando Perez began IPython in 2001 based on code from Janko Hauser -and Nathaniel Gray . Fernando is still the project lead. +Fernando Perez began IPython in 2001 based on code from Janko Hauser + and Nathaniel Gray . Fernando is still +the project lead. -The IPython Development Team is the set of all contributors to the IPython project. -This includes all of the IPython subprojects. Here is a list of the currently active contributors: +The IPython Development Team is the set of all contributors to the IPython +project. This includes all of the IPython subprojects. Here is a list of the +currently active contributors: * Matthieu Brucher * Ondrej Certik @@ -65,23 +69,23 @@ If your name is missing, please add it. Our Copyright Policy ==================== -IPython uses a shared copyright model. Each contributor maintains copyright over -their contributions to IPython. But, it is important to note that these -contributions are typically only changes to the repositories. Thus, the IPython -source code, in its entirety is not the copyright of any single person or -institution. Instead, it is the collective copyright of the entire IPython +IPython uses a shared copyright model. Each contributor maintains copyright +over their contributions to IPython. But, it is important to note that these +contributions are typically only changes to the repositories. Thus, the +IPython source code, in its entirety is not the copyright of any single person +or institution. Instead, it is the collective copyright of the entire IPython Development Team. If individual contributors want to maintain a record of what -changes/contributions they have specific copyright on, they should indicate their -copyright in the commit message of the change, when they commit the change to -one of the IPython repositories. +changes/contributions they have specific copyright on, they should indicate +their copyright in the commit message of the change, when they commit the +change to one of the IPython repositories. Miscellaneous ============= Some files (DPyGetOpt.py, for example) may be licensed under different -conditions. Ultimately each file indicates clearly the conditions under -which its author/authors have decided to publish the code. +conditions. Ultimately each file indicates clearly the conditions under which +its author/authors have decided to publish the code. -Versions of IPython up to and including 0.6.3 were released under the -GNU Lesser General Public License (LGPL), available at +Versions of IPython up to and including 0.6.3 were released under the GNU +Lesser General Public License (LGPL), available at http://www.gnu.org/copyleft/lesser.html. \ No newline at end of file diff --git a/docs/source/parallel/index.txt b/docs/source/parallel/index.txt index 15c8436..0b4e378 100644 --- a/docs/source/parallel/index.txt +++ b/docs/source/parallel/index.txt @@ -8,7 +8,9 @@ Using IPython for parallel computing :maxdepth: 2 parallel_intro.txt + parallel_process.txt parallel_multiengine.txt parallel_task.txt parallel_mpi.txt + parallel_security.txt diff --git a/docs/source/parallel/parallel_intro.txt b/docs/source/parallel/parallel_intro.txt index 23900cd..92db52b 100644 --- a/docs/source/parallel/parallel_intro.txt +++ b/docs/source/parallel/parallel_intro.txt @@ -4,20 +4,17 @@ Overview and getting started ============================ -.. contents:: - Introduction ============ -This file gives an overview of IPython's sophisticated and -powerful architecture for parallel and distributed computing. This -architecture abstracts out parallelism in a very general way, which -enables IPython to support many different styles of parallelism -including: +This section gives an overview of IPython's sophisticated and powerful +architecture for parallel and distributed computing. This architecture +abstracts out parallelism in a very general way, which enables IPython to +support many different styles of parallelism including: * Single program, multiple data (SPMD) parallelism. * Multiple program, multiple data (MPMD) parallelism. -* Message passing using ``MPI``. +* Message passing using MPI. * Task farming. * Data parallel. * Combinations of these approaches. @@ -97,8 +94,8 @@ The controller also provides a single point of contact for users who wish to utilize the engines connected to the controller. There are different ways of working with a controller. In IPython these ways correspond to different interfaces that the controller is adapted to. Currently we have two default interfaces to the controller: -* The MultiEngine interface, which provides the simplest possible way of working - with engines interactively. +* The MultiEngine interface, which provides the simplest possible way of + working with engines interactively. * The Task interface, which provides presents the engines as a load balanced task farming system. @@ -124,169 +121,46 @@ interface. Here are the two default clients: Security -------- -By default (as long as `pyOpenSSL` is installed) all network connections between the controller and engines and the controller and clients are secure. What does this mean? First of all, all of the connections will be encrypted using SSL. Second, the connections are authenticated. We handle authentication in a `capabilities`__ based security model. In this model, a "capability (known in some systems as a key) is a communicable, unforgeable token of authority". Put simply, a capability is like a key to your house. If you have the key to your house, you can get in. If not, you can't. - -.. __: http://en.wikipedia.org/wiki/Capability-based_security +By default (as long as `pyOpenSSL` is installed) all network connections between the controller and engines and the controller and clients are secure. What does this mean? First of all, all of the connections will be encrypted using SSL. Second, the connections are authenticated. We handle authentication in a capability based security model [Capability]_. In this model, a "capability (known in some systems as a key) is a communicable, unforgeable token of authority". Put simply, a capability is like a key to your house. If you have the key to your house, you can get in. If not, you can't. In our architecture, the controller is the only process that listens on network ports, and is thus responsible to creating these keys. In IPython, these keys are known as Foolscap URLs, or FURLs, because of the underlying network protocol we are using. As a user, you don't need to know anything about the details of these FURLs, other than that when the controller starts, it saves a set of FURLs to files named :file:`something.furl`. The default location of these files is the :file:`~./ipython/security` directory. -To connect and authenticate to the controller an engine or client simply needs to present an appropriate furl (that was originally created by the controller) to the controller. Thus, the .furl files need to be copied to a location where the clients and engines can find them. Typically, this is the :file:`~./ipython/security` directory on the host where the client/engine is running (which could be a different host than the controller). Once the .furl files are copied over, everything should work fine. +To connect and authenticate to the controller an engine or client simply needs to present an appropriate FURL (that was originally created by the controller) to the controller. Thus, the FURL files need to be copied to a location where the clients and engines can find them. Typically, this is the :file:`~./ipython/security` directory on the host where the client/engine is running (which could be a different host than the controller). Once the FURL files are copied over, everything should work fine. -Currently, there are three .furl files that the controller creates: +Currently, there are three FURL files that the controller creates: ipcontroller-engine.furl - This ``.furl`` file is the key that gives an engine the ability to connect + This FURL file is the key that gives an engine the ability to connect to a controller. ipcontroller-tc.furl - This ``.furl`` file is the key that a :class:`TaskClient` must use to + This FURL file is the key that a :class:`TaskClient` must use to connect to the task interface of a controller. ipcontroller-mec.furl - This ``.furl`` file is the key that a :class:`MultiEngineClient` must use to - connect to the multiengine interface of a controller. + This FURL file is the key that a :class:`MultiEngineClient` must use + to connect to the multiengine interface of a controller. -More details of how these ``.furl`` files are used are given below. +More details of how these FURL files are used are given below. + +A detailed description of the security model and its implementation in IPython +can be found :ref:`here `. Getting Started =============== To use IPython for parallel computing, you need to start one instance of -the controller and one or more instances of the engine. The controller -and each engine can run on different machines or on the same machine. -Because of this, there are many different possibilities for setting up -the IP addresses and ports used by the various processes. - -Starting the controller and engine on your local machine --------------------------------------------------------- - -This is the simplest configuration that can be used and is useful for -testing the system and on machines that have multiple cores and/or -multple CPUs. The easiest way of getting started is to use the :command:`ipcluster` -command:: - - $ ipcluster -n 4 - -This will start an IPython controller and then 4 engines that connect to -the controller. Lastly, the script will print out the Python commands -that you can use to connect to the controller. It is that easy. - -.. warning:: - - The :command:`ipcluster` does not currently work on Windows. We are - working on it though. - -Underneath the hood, the controller creates ``.furl`` files in the -:file:`~./ipython/security` directory. Because the engines are on the -same host, they automatically find the needed :file:`ipcontroller-engine.furl` -there and use it to connect to the controller. - -The :command:`ipcluster` script uses two other top-level -scripts that you can also use yourself. These scripts are -:command:`ipcontroller`, which starts the controller and :command:`ipengine` which -starts one engine. To use these scripts to start things on your local -machine, do the following. - -First start the controller:: - - $ ipcontroller - -Next, start however many instances of the engine you want using (repeatedly) the command:: - - $ ipengine - -The engines should start and automatically connect to the controller using the ``.furl`` files in :file:`~./ipython/security`. You are now ready to use the controller and engines from IPython. - -.. warning:: - - The order of the above operations is very important. You *must* - start the controller before the engines, since the engines connect - to the controller as they get started. - -.. note:: - - On some platforms (OS X), to put the controller and engine into the background - you may need to give these commands in the form ``(ipcontroller &)`` - and ``(ipengine &)`` (with the parentheses) for them to work properly. - - -Starting the controller and engines on different hosts ------------------------------------------------------- - -When the controller and engines are running on different hosts, things are -slightly more complicated, but the underlying ideas are the same: - -1. Start the controller on a host using :command:`ipcontroler`. -2. Copy :file:`ipcontroller-engine.furl` from :file:`~./ipython/security` on the controller's host to the host where the engines will run. -3. Use :command:`ipengine` on the engine's hosts to start the engines. +the controller and one or more instances of the engine. Initially, it is best to simply start a controller and engines on a single host using the :command:`ipcluster` command. To start a controller and 4 engines on you localhost, just do:: -The only thing you have to be careful of is to tell :command:`ipengine` where the :file:`ipcontroller-engine.furl` file is located. There are two ways you can do this: + $ ipcluster local -n 4 -* Put :file:`ipcontroller-engine.furl` in the :file:`~./ipython/security` directory - on the engine's host, where it will be found automatically. -* Call :command:`ipengine` with the ``--furl-file=full_path_to_the_file`` flag. - -The ``--furl-file`` flag works like this:: - - $ ipengine --furl-file=/path/to/my/ipcontroller-engine.furl - -.. note:: - - If the controller's and engine's hosts all have a shared file system - (:file:`~./ipython/security` is the same on all of them), then things - will just work! - -Make .furl files persistent ---------------------------- - -At fist glance it may seem that that managing the ``.furl`` files is a bit annoying. Going back to the house and key analogy, copying the ``.furl`` around each time you start the controller is like having to make a new key everytime you want to unlock the door and enter your house. As with your house, you want to be able to create the key (or ``.furl`` file) once, and then simply use it at any point in the future. - -This is possible. The only thing you have to do is decide what ports the controller will listen on for the engines and clients. This is done as follows:: - - $ ipcontroller --client-port=10101 --engine-port=10102 - -Then, just copy the furl files over the first time and you are set. You can start and stop the controller and engines any many times as you want in the future, just make sure to tell the controller to use the *same* ports. - -.. note:: - - You may ask the question: what ports does the controller listen on if you - don't tell is to use specific ones? The default is to use high random port - numbers. We do this for two reasons: i) to increase security through obcurity - and ii) to multiple controllers on a given host to start and automatically - use different ports. - -Starting engines using ``mpirun`` ---------------------------------- - -The IPython engines can be started using ``mpirun``/``mpiexec``, even if -the engines don't call ``MPI_Init()`` or use the MPI API in any way. This is -supported on modern MPI implementations like `Open MPI`_.. This provides -an really nice way of starting a bunch of engine. On a system with MPI -installed you can do:: - - mpirun -n 4 ipengine - -to start 4 engine on a cluster. This works even if you don't have any -Python-MPI bindings installed. - -.. _Open MPI: http://www.open-mpi.org/ - -More details on using MPI with IPython can be found :ref:`here `. - -Log files ---------- - -All of the components of IPython have log files associated with them. -These log files can be extremely useful in debugging problems with -IPython and can be found in the directory ``~/.ipython/log``. Sending -the log files to us will often help us to debug any problems. - -Next Steps -========== +More details about starting the IPython controller and engines can be found :ref:`here ` Once you have started the IPython controller and one or more engines, you are ready to use the engines to do something useful. To make sure -everything is working correctly, try the following commands:: +everything is working correctly, try the following commands: + +.. sourcecode:: ipython In [1]: from IPython.kernel import client @@ -310,7 +184,7 @@ everything is working correctly, try the following commands:: [3] In [1]: print "Hello World" [3] Out[1]: Hello World -Remember, a client also needs to present a ``.furl`` file to the controller. How does this happen? When a multiengine client is created with no arguments, the client tries to find the corresponding ``.furl`` file in the local :file:`~./ipython/security` directory. If it finds it, you are set. If you have put the ``.furl`` file in a different location or it has a different name, create the client like this:: +Remember, a client also needs to present a FURL file to the controller. How does this happen? When a multiengine client is created with no arguments, the client tries to find the corresponding FURL file in the local :file:`~./ipython/security` directory. If it finds it, you are set. If you have put the FURL file in a different location or it has a different name, create the client like this:: mec = client.MultiEngineClient('/path/to/my/ipcontroller-mec.furl') @@ -323,5 +197,9 @@ You are now ready to learn more about the :ref:`MultiEngine `. + +Actually using MPI +================== + +Once the engines are running with MPI enabled, you are ready to go. You can now call any code that uses MPI in the IPython engines. And, all of this can be done interactively. Here we show a simple example that uses mpi4py [mpi4py]_. + +First, lets define a simply function that uses MPI to calculate the sum of a distributed array. Save the following text in a file called :file:`psum.py`: + +.. sourcecode:: python + + from mpi4py import MPI + import numpy as np + + def psum(a): + s = np.sum(a) + return MPI.COMM_WORLD.Allreduce(s,MPI.SUM) + +Now, start an IPython cluster in the same directory as :file:`psum.py`:: + + $ ipcluster mpirun -n 4 + +Finally, connect to the cluster and use this function interactively. In this case, we create a random array on each engine and sum up all the random arrays using our :func:`psum` function: + +.. sourcecode:: ipython + + In [1]: from IPython.kernel import client + + In [2]: mec = client.MultiEngineClient() + + In [3]: mec.activate() + + In [4]: px import numpy as np + Parallel execution on engines: all + Out[4]: + + [0] In [13]: import numpy as np + [1] In [13]: import numpy as np + [2] In [13]: import numpy as np + [3] In [13]: import numpy as np + + In [6]: px a = np.random.rand(100) + Parallel execution on engines: all + Out[6]: + + [0] In [15]: a = np.random.rand(100) + [1] In [15]: a = np.random.rand(100) + [2] In [15]: a = np.random.rand(100) + [3] In [15]: a = np.random.rand(100) + + In [7]: px from psum import psum + Parallel execution on engines: all + Out[7]: + + [0] In [16]: from psum import psum + [1] In [16]: from psum import psum + [2] In [16]: from psum import psum + [3] In [16]: from psum import psum + + In [8]: px s = psum(a) + Parallel execution on engines: all + Out[8]: + + [0] In [17]: s = psum(a) + [1] In [17]: s = psum(a) + [2] In [17]: s = psum(a) + [3] In [17]: s = psum(a) + + In [9]: px print s + Parallel execution on engines: all + Out[9]: + + [0] In [18]: print s + [0] Out[18]: 187.451545803 + + [1] In [18]: print s + [1] Out[18]: 187.451545803 + + [2] In [18]: print s + [2] Out[18]: 187.451545803 + + [3] In [18]: print s + [3] Out[18]: 187.451545803 + +Any Python code that makes calls to MPI can be used in this manner, including +compiled C, C++ and Fortran libraries that have been exposed to Python. + +.. [MPI] Message Passing Interface. http://www-unix.mcs.anl.gov/mpi/ +.. [mpi4py] MPI for Python. mpi4py: http://mpi4py.scipy.org/ +.. [OpenMPI] Open MPI. http://www.open-mpi.org/ +.. [PyTrilinos] PyTrilinos. http://trilinos.sandia.gov/packages/pytrilinos/ \ No newline at end of file diff --git a/docs/source/parallel/parallel_multiengine.txt b/docs/source/parallel/parallel_multiengine.txt index becc6f8..8c91e38 100644 --- a/docs/source/parallel/parallel_multiengine.txt +++ b/docs/source/parallel/parallel_multiengine.txt @@ -4,8 +4,6 @@ IPython's multiengine interface =============================== -.. contents:: - The multiengine interface represents one possible way of working with a set of IPython engines. The basic idea behind the multiengine interface is that the capabilities of each engine are directly and explicitly exposed to the user. @@ -21,7 +19,7 @@ To follow along with this tutorial, you will need to start the IPython controller and four IPython engines. The simplest way of doing this is to use the :command:`ipcluster` command:: - $ ipcluster -n 4 + $ ipcluster local -n 4 For more detailed information about starting the controller and engines, see our :ref:`introduction ` to using IPython for parallel computing. @@ -30,7 +28,9 @@ Creating a ``MultiEngineClient`` instance ========================================= The first step is to import the IPython :mod:`IPython.kernel.client` module -and then create a :class:`MultiEngineClient` instance:: +and then create a :class:`MultiEngineClient` instance: + +.. sourcecode:: ipython In [1]: from IPython.kernel import client @@ -38,13 +38,17 @@ and then create a :class:`MultiEngineClient` instance:: This form assumes that the :file:`ipcontroller-mec.furl` is in the :file:`~./ipython/security` directory on the client's host. If not, the -location of the ``.furl`` file must be given as an argument to the -constructor:: +location of the FURL file must be given as an argument to the +constructor: + +.. sourcecode:: ipython - In[2]: mec = client.MultiEngineClient('/path/to/my/ipcontroller-mec.furl') + In [2]: mec = client.MultiEngineClient('/path/to/my/ipcontroller-mec.furl') To make sure there are engines connected to the controller, use can get a list -of engine ids:: +of engine ids: + +.. sourcecode:: ipython In [3]: mec.get_ids() Out[3]: [0, 1, 2, 3] @@ -62,7 +66,9 @@ Parallel map Python's builtin :func:`map` functions allows a function to be applied to a sequence element-by-element. This type of code is typically trivial to parallelize. In fact, the multiengine interface in IPython already has a -parallel version of :meth:`map` that works just like its serial counterpart:: +parallel version of :meth:`map` that works just like its serial counterpart: + +.. sourcecode:: ipython In [63]: serial_result = map(lambda x:x**10, range(32)) @@ -84,7 +90,9 @@ parallel version of :meth:`map` that works just like its serial counterpart:: Parallel function decorator --------------------------- -Parallel functions are just like normal function, but they can be called on sequences and *in parallel*. The multiengine interface provides a decorator that turns any Python function into a parallel function:: +Parallel functions are just like normal function, but they can be called on sequences and *in parallel*. The multiengine interface provides a decorator that turns any Python function into a parallel function: + +.. sourcecode:: ipython In [10]: @mec.parallel() ....: def f(x): @@ -110,7 +118,9 @@ Blocking execution In blocking mode, the :class:`MultiEngineClient` object (called ``mec`` in these examples) submits the command to the controller, which places the command in the engines' queues for execution. The :meth:`execute` call then -blocks until the engines are done executing the command:: +blocks until the engines are done executing the command: + +.. sourcecode:: ipython # The default is to run on all engines In [4]: mec.execute('a=5') @@ -130,7 +140,9 @@ blocks until the engines are done executing the command:: [3] In [2]: b=10 Python commands can be executed on specific engines by calling execute using -the ``targets`` keyword argument:: +the ``targets`` keyword argument: + +.. sourcecode:: ipython In [6]: mec.execute('c=a+b',targets=[0,2]) Out[6]: @@ -163,7 +175,9 @@ the ``targets`` keyword argument:: This example also shows one of the most important things about the IPython engines: they have a persistent user namespaces. The :meth:`execute` method -returns a Python ``dict`` that contains useful information:: +returns a Python ``dict`` that contains useful information: + +.. sourcecode:: ipython In [9]: result_dict = mec.execute('d=10; print d') @@ -184,7 +198,9 @@ then returns a :class:`PendingResult` object immediately. The :class:`PendingResult` object gives you a way of getting a result at a later time through its :meth:`get_result` method or :attr:`r` attribute. This allows you to quickly submit long running commands without blocking your local -Python/IPython session:: +Python/IPython session: + +.. sourcecode:: ipython # In blocking mode In [6]: mec.execute('import time') @@ -225,7 +241,9 @@ Python/IPython session:: Often, it is desirable to wait until a set of :class:`PendingResult` objects are done. For this, there is a the method :meth:`barrier`. This method takes a tuple of :class:`PendingResult` objects and blocks until all of the associated -results are ready:: +results are ready: + +.. sourcecode:: ipython In [72]: mec.block=False @@ -259,7 +277,9 @@ and :attr:`targets`: * If no keyword argument is provided, the instance attributes are used. * Keyword argument, if provided override the instance attributes. -The following examples demonstrate how to use the instance attributes:: +The following examples demonstrate how to use the instance attributes: + +.. sourcecode:: ipython In [16]: mec.targets = [0,2] @@ -305,7 +325,9 @@ that make it more pleasant to execute Python commands on the engines interactively. These are simply shortcuts to :meth:`execute` and :meth:`get_result`. The ``%px`` magic executes a single Python command on the engines specified by the :attr:`targets` attribute of the -:class:`MultiEngineClient` instance (by default this is ``'all'``):: +:class:`MultiEngineClient` instance (by default this is ``'all'``): + +.. sourcecode:: ipython # Make this MultiEngineClient active for parallel magic commands In [23]: mec.activate() @@ -352,7 +374,9 @@ engines specified by the :attr:`targets` attribute of the The ``%result`` magic gets and prints the stdin/stdout/stderr of the last command executed on each engine. It is simply a shortcut to the -:meth:`get_result` method:: +:meth:`get_result` method: + +.. sourcecode:: ipython In [29]: %result Out[29]: @@ -370,7 +394,9 @@ command executed on each engine. It is simply a shortcut to the [3] Out[9]: [ 0.83664764 -0.25602658] The ``%autopx`` magic switches to a mode where everything you type is executed -on the engines given by the :attr:`targets` attribute:: +on the engines given by the :attr:`targets` attribute: + +.. sourcecode:: ipython In [30]: mec.block=False @@ -423,7 +449,9 @@ are called :meth:`push` (sending an object to the engines) and :meth:`pull` Basic push and pull ------------------- -Here are some examples of how you use :meth:`push` and :meth:`pull`:: +Here are some examples of how you use :meth:`push` and :meth:`pull`: + +.. sourcecode:: ipython In [38]: mec.push(dict(a=1.03234,b=3453)) Out[38]: [None, None, None, None] @@ -460,7 +488,9 @@ Here are some examples of how you use :meth:`push` and :meth:`pull`:: [3] Out[13]: speed In non-blocking mode :meth:`push` and :meth:`pull` also return -:class:`PendingResult` objects:: +:class:`PendingResult` objects: + +.. sourcecode:: ipython In [47]: mec.block=False @@ -474,8 +504,9 @@ Push and pull for functions --------------------------- Functions can also be pushed and pulled using :meth:`push_function` and -:meth:`pull_function`:: +:meth:`pull_function`: +.. sourcecode:: ipython In [52]: mec.block=True @@ -518,7 +549,9 @@ Dictionary interface As a shorthand to :meth:`push` and :meth:`pull`, the :class:`MultiEngineClient` class implements some of the Python dictionary interface. This make the remote namespaces of the engines appear as a local -dictionary. Underneath, this uses :meth:`push` and :meth:`pull`:: +dictionary. Underneath, this uses :meth:`push` and :meth:`pull`: + +.. sourcecode:: ipython In [50]: mec.block=True @@ -536,7 +569,9 @@ follow that terminology. However, it is important to remember that in IPython's :class:`MultiEngineClient` class, :meth:`scatter` is from the interactive IPython session to the engines and :meth:`gather` is from the engines back to the interactive IPython session. For scatter/gather operations -between engines, MPI should be used:: +between engines, MPI should be used: + +.. sourcecode:: ipython In [58]: mec.scatter('a',range(16)) Out[58]: [None, None, None, None] @@ -569,7 +604,9 @@ How to do parallel list comprehensions In many cases list comprehensions are nicer than using the map function. While we don't have fully parallel list comprehensions, it is simple to get the -basic effect using :meth:`scatter` and :meth:`gather`:: +basic effect using :meth:`scatter` and :meth:`gather`: + +.. sourcecode:: ipython In [66]: mec.scatter('x',range(64)) Out[66]: [None, None, None, None] @@ -598,7 +635,9 @@ parallel command can actually raise multiple exceptions (one for each engine the command was run on). To express this idea, the MultiEngine interface has a :exc:`CompositeError` exception class that will be raised in most cases. The :exc:`CompositeError` class is a special type of exception that wraps one or -more other types of exceptions. Here is how it works:: +more other types of exceptions. Here is how it works: + +.. sourcecode:: ipython In [76]: mec.block=True @@ -628,7 +667,9 @@ more other types of exceptions. Here is how it works:: [2:execute]: ZeroDivisionError: integer division or modulo by zero [3:execute]: ZeroDivisionError: integer division or modulo by zero -Notice how the error message printed when :exc:`CompositeError` is raised has information about the individual exceptions that were raised on each engine. If you want, you can even raise one of these original exceptions:: +Notice how the error message printed when :exc:`CompositeError` is raised has information about the individual exceptions that were raised on each engine. If you want, you can even raise one of these original exceptions: + +.. sourcecode:: ipython In [80]: try: ....: mec.execute('1/0') @@ -652,7 +693,9 @@ Notice how the error message printed when :exc:`CompositeError` is raised has in If you are working in IPython, you can simple type ``%debug`` after one of these :exc:`CompositeError` exceptions is raised, and inspect the exception -instance:: +instance: + +.. sourcecode:: ipython In [81]: mec.execute('1/0') --------------------------------------------------------------------------- @@ -734,7 +777,9 @@ instance:: The above example appears to be broken right now because of a change in how we are using Twisted. -All of this same error handling magic even works in non-blocking mode:: +All of this same error handling magic even works in non-blocking mode: + +.. sourcecode:: ipython In [83]: mec.block=False diff --git a/docs/source/parallel/parallel_process.txt b/docs/source/parallel/parallel_process.txt new file mode 100644 index 0000000..660d06d --- /dev/null +++ b/docs/source/parallel/parallel_process.txt @@ -0,0 +1,251 @@ +.. _parallel_process: + +=========================================== +Starting the IPython controller and engines +=========================================== + +To use IPython for parallel computing, you need to start one instance of +the controller and one or more instances of the engine. The controller +and each engine can run on different machines or on the same machine. +Because of this, there are many different possibilities. + +Broadly speaking, there are two ways of going about starting a controller and engines: + +* In an automated manner using the :command:`ipcluster` command. +* In a more manual way using the :command:`ipcontroller` and + :command:`ipengine` commands. + +This document describes both of these methods. We recommend that new users start with the :command:`ipcluster` command as it simplifies many common usage cases. + +General considerations +====================== + +Before delving into the details about how you can start a controller and engines using the various methods, we outline some of the general issues that come up when starting the controller and engines. These things come up no matter which method you use to start your IPython cluster. + +Let's say that you want to start the controller on ``host0`` and engines on hosts ``host1``-``hostn``. The following steps are then required: + +1. Start the controller on ``host0`` by running :command:`ipcontroller` on + ``host0``. +2. Move the FURL file (:file:`ipcontroller-engine.furl`) created by the + controller from ``host0`` to hosts ``host1``-``hostn``. +3. Start the engines on hosts ``host1``-``hostn`` by running + :command:`ipengine`. This command has to be told where the FURL file + (:file:`ipcontroller-engine.furl`) is located. + +At this point, the controller and engines will be connected. By default, the +FURL files created by the controller are put into the +:file:`~/.ipython/security` directory. If the engines share a filesystem with +the controller, step 2 can be skipped as the engines will automatically look +at that location. + +The final step required required to actually use the running controller from a +client is to move the FURL files :file:`ipcontroller-mec.furl` and +:file:`ipcontroller-tc.furl` from ``host0`` to the host where the clients will +be run. If these file are put into the :file:`~/.ipython/security` directory of the client's host, they will be found automatically. Otherwise, the full path to them has to be passed to the client's constructor. + +Using :command:`ipcluster` +========================== + +The :command:`ipcluster` command provides a simple way of starting a controller and engines in the following situations: + +1. When the controller and engines are all run on localhost. This is useful + for testing or running on a multicore computer. +2. When engines are started using the :command:`mpirun` command that comes + with most MPI [MPI]_ implementations +3. When engines are started using the PBS [PBS]_ batch system. + +.. note:: + + It is also possible for advanced users to add support to + :command:`ipcluster` for starting controllers and engines using other + methods (like Sun's Grid Engine for example). + +.. note:: + + Currently :command:`ipcluster` requires that the + :file:`~/.ipython/security` directory live on a shared filesystem that is + seen by both the controller and engines. If you don't have a shared file + system you will need to use :command:`ipcontroller` and + :command:`ipengine` directly. + +Underneath the hood, :command:`ipcluster` just uses :command:`ipcontroller` +and :command:`ipengine` to perform the steps described above. + +Using :command:`ipcluster` in local mode +---------------------------------------- + +To start one controller and 4 engines on localhost, just do:: + + $ ipcluster local -n 4 + +To see other command line options for the local mode, do:: + + $ ipcluster local -h + +Using :command:`ipcluster` in mpirun mode +----------------------------------------- + +The mpirun mode is useful if you: + +1. Have MPI installed. +2. Your systems are configured to use the :command:`mpirun` command to start + processes. + +If these are satisfied, you can start an IPython cluster using:: + + $ ipcluster mpirun -n 4 + +This does the following: + +1. Starts the IPython controller on current host. +2. Uses :command:`mpirun` to start 4 engines. + +On newer MPI implementations (such as OpenMPI), this will work even if you don't make any calls to MPI or call :func:`MPI_Init`. However, older MPI implementations actually require each process to call :func:`MPI_Init` upon starting. The easiest way of having this done is to install the mpi4py [mpi4py]_ package and then call ipcluster with the ``--mpi`` option:: + + $ ipcluster mpirun -n 4 --mpi=mpi4py + +Unfortunately, even this won't work for some MPI implementations. If you are having problems with this, you will likely have to use a custom Python executable that itself calls :func:`MPI_Init` at the appropriate time. Fortunately, mpi4py comes with such a custom Python executable that is easy to install and use. However, this custom Python executable approach will not work with :command:`ipcluster` currently. + +Additional command line options for this mode can be found by doing:: + + $ ipcluster mpirun -h + +More details on using MPI with IPython can be found :ref:`here `. + + +Using :command:`ipcluster` in PBS mode +-------------------------------------- + +The PBS mode uses the Portable Batch System [PBS]_ to start the engines. To use this mode, you first need to create a PBS script template that will be used to start the engines. Here is a sample PBS script template: + +.. sourcecode:: bash + + #PBS -N ipython + #PBS -j oe + #PBS -l walltime=00:10:00 + #PBS -l nodes=${n/4}:ppn=4 + #PBS -q parallel + + cd $$PBS_O_WORKDIR + export PATH=$$HOME/usr/local/bin + export PYTHONPATH=$$HOME/usr/local/lib/python2.4/site-packages + /usr/local/bin/mpiexec -n ${n} ipengine --logfile=$$PBS_O_WORKDIR/ipengine + +There are a few important points about this template: + +1. This template will be rendered at runtime using IPython's :mod:`Itpl` + template engine. + +2. Instead of putting in the actual number of engines, use the notation + ``${n}`` to indicate the number of engines to be started. You can also uses + expressions like ``${n/4}`` in the template to indicate the number of + nodes. + +3. Because ``$`` is a special character used by the template engine, you must + escape any ``$`` by using ``$$``. This is important when referring to + environment variables in the template. + +4. Any options to :command:`ipengine` should be given in the batch script + template. + +5. Depending on the configuration of you system, you may have to set + environment variables in the script template. + +Once you have created such a script, save it with a name like :file:`pbs.template`. Now you are ready to start your job:: + + $ ipcluster pbs -n 128 --pbs-script=pbs.template + +Additional command line options for this mode can be found by doing:: + + $ ipcluster pbs -h + +Using the :command:`ipcontroller` and :command:`ipengine` commands +================================================================== + +It is also possible to use the :command:`ipcontroller` and :command:`ipengine` commands to start your controller and engines. This approach gives you full control over all aspects of the startup process. + +Starting the controller and engine on your local machine +-------------------------------------------------------- + +To use :command:`ipcontroller` and :command:`ipengine` to start things on your +local machine, do the following. + +First start the controller:: + + $ ipcontroller + +Next, start however many instances of the engine you want using (repeatedly) the command:: + + $ ipengine + +The engines should start and automatically connect to the controller using the FURL files in :file:`~./ipython/security`. You are now ready to use the controller and engines from IPython. + +.. warning:: + + The order of the above operations is very important. You *must* + start the controller before the engines, since the engines connect + to the controller as they get started. + +.. note:: + + On some platforms (OS X), to put the controller and engine into the + background you may need to give these commands in the form ``(ipcontroller + &)`` and ``(ipengine &)`` (with the parentheses) for them to work + properly. + +Starting the controller and engines on different hosts +------------------------------------------------------ + +When the controller and engines are running on different hosts, things are +slightly more complicated, but the underlying ideas are the same: + +1. Start the controller on a host using :command:`ipcontroller`. +2. Copy :file:`ipcontroller-engine.furl` from :file:`~./ipython/security` on the controller's host to the host where the engines will run. +3. Use :command:`ipengine` on the engine's hosts to start the engines. + +The only thing you have to be careful of is to tell :command:`ipengine` where the :file:`ipcontroller-engine.furl` file is located. There are two ways you can do this: + +* Put :file:`ipcontroller-engine.furl` in the :file:`~./ipython/security` + directory on the engine's host, where it will be found automatically. +* Call :command:`ipengine` with the ``--furl-file=full_path_to_the_file`` + flag. + +The ``--furl-file`` flag works like this:: + + $ ipengine --furl-file=/path/to/my/ipcontroller-engine.furl + +.. note:: + + If the controller's and engine's hosts all have a shared file system + (:file:`~./ipython/security` is the same on all of them), then things + will just work! + +Make FURL files persistent +--------------------------- + +At fist glance it may seem that that managing the FURL files is a bit annoying. Going back to the house and key analogy, copying the FURL around each time you start the controller is like having to make a new key every time you want to unlock the door and enter your house. As with your house, you want to be able to create the key (or FURL file) once, and then simply use it at any point in the future. + +This is possible. The only thing you have to do is decide what ports the controller will listen on for the engines and clients. This is done as follows:: + + $ ipcontroller -r --client-port=10101 --engine-port=10102 + +Then, just copy the furl files over the first time and you are set. You can start and stop the controller and engines any many times as you want in the future, just make sure to tell the controller to use the *same* ports. + +.. note:: + + You may ask the question: what ports does the controller listen on if you + don't tell is to use specific ones? The default is to use high random port + numbers. We do this for two reasons: i) to increase security through + obscurity and ii) to multiple controllers on a given host to start and + automatically use different ports. + +Log files +--------- + +All of the components of IPython have log files associated with them. +These log files can be extremely useful in debugging problems with +IPython and can be found in the directory :file:`~/.ipython/log`. Sending +the log files to us will often help us to debug any problems. + + +.. [PBS] Portable Batch System. http://www.openpbs.org/ diff --git a/docs/source/parallel/parallel_security.txt b/docs/source/parallel/parallel_security.txt new file mode 100644 index 0000000..abd876c --- /dev/null +++ b/docs/source/parallel/parallel_security.txt @@ -0,0 +1,363 @@ +.. _parallelsecurity: + +=========================== +Security details of IPython +=========================== + +IPython's :mod:`IPython.kernel` package exposes the full power of the Python +interpreter over a TCP/IP network for the purposes of parallel computing. This +feature brings up the important question of IPython's security model. This +document gives details about this model and how it is implemented in IPython's +architecture. + +Processs and network topology +============================= + +To enable parallel computing, IPython has a number of different processes that +run. These processes are discussed at length in the IPython documentation and +are summarized here: + +* The IPython *engine*. This process is a full blown Python + interpreter in which user code is executed. Multiple + engines are started to make parallel computing possible. +* The IPython *controller*. This process manages a set of + engines, maintaining a queue for each and presenting + an asynchronous interface to the set of engines. +* The IPython *client*. This process is typically an + interactive Python process that is used to coordinate the + engines to get a parallel computation done. + +Collectively, these three processes are called the IPython *kernel*. + +These three processes communicate over TCP/IP connections with a well defined +topology. The IPython controller is the only process that listens on TCP/IP +sockets. Upon starting, an engine connects to a controller and registers +itself with the controller. These engine/controller TCP/IP connections persist +for the lifetime of each engine. + +The IPython client also connects to the controller using one or more TCP/IP +connections. These connections persist for the lifetime of the client only. + +A given IPython controller and set of engines typically has a relatively short +lifetime. Typically this lifetime corresponds to the duration of a single +parallel simulation performed by a single user. Finally, the controller, +engines and client processes typically execute with the permissions of that +same user. More specifically, the controller and engines are *not* executed as +root or with any other superuser permissions. + +Application logic +================= + +When running the IPython kernel to perform a parallel computation, a user +utilizes the IPython client to send Python commands and data through the +IPython controller to the IPython engines, where those commands are executed +and the data processed. The design of IPython ensures that the client is the +only access point for the capabilities of the engines. That is, the only way of addressing the engines is through a client. + +A user can utilize the client to instruct the IPython engines to execute +arbitrary Python commands. These Python commands can include calls to the +system shell, access the filesystem, etc., as required by the user's +application code. From this perspective, when a user runs an IPython engine on +a host, that engine has the same capabilities and permissions as the user +themselves (as if they were logged onto the engine's host with a terminal). + +Secure network connections +========================== + +Overview +-------- + +All TCP/IP connections between the client and controller as well as the +engines and controller are fully encrypted and authenticated. This section +describes the details of the encryption and authentication approached used +within IPython. + +IPython uses the Foolscap network protocol [Foolscap]_ for all communications +between processes. Thus, the details of IPython's security model are directly +related to those of Foolscap. Thus, much of the following discussion is +actually just a discussion of the security that is built in to Foolscap. + +Encryption +---------- + +For encryption purposes, IPython and Foolscap use the well known Secure Socket +Layer (SSL) protocol [RFC5246]_. We use the implementation of this protocol +provided by the OpenSSL project through the pyOpenSSL [pyOpenSSL]_ Python +bindings to OpenSSL. + +Authentication +-------------- + +IPython clients and engines must also authenticate themselves with the +controller. This is handled in a capabilities based security model +[Capability]_. In this model, the controller creates a strong cryptographic +key or token that represents each set of capability that the controller +offers. Any party who has this key and presents it to the controller has full +access to the corresponding capabilities of the controller. This model is +analogous to using a physical key to gain access to physical items +(capabilities) behind a locked door. + +For a capabilities based authentication system to prevent unauthorized access, +two things must be ensured: + +* The keys must be cryptographically strong. Otherwise attackers could gain + access by a simple brute force key guessing attack. +* The actual keys must be distributed only to authorized parties. + +The keys in Foolscap are called Foolscap URL's or FURLs. The following section +gives details about how these FURLs are created in Foolscap. The IPython +controller creates a number of FURLs for different purposes: + +* One FURL that grants IPython engines access to the controller. Also + implicit in this access is permission to execute code sent by an + authenticated IPython client. +* Two or more FURLs that grant IPython clients access to the controller. + Implicit in this access is permission to give the controller's engine code + to execute. + +Upon starting, the controller creates these different FURLS and writes them +files in the user-read-only directory :file:`$HOME/.ipython/security`. Thus, only the +user who starts the controller has access to the FURLs. + +For an IPython client or engine to authenticate with a controller, it must +present the appropriate FURL to the controller upon connecting. If the +FURL matches what the controller expects for a given capability, access is +granted. If not, access is denied. The exchange of FURLs is done after +encrypted communications channels have been established to prevent attackers +from capturing them. + +.. note:: + + The FURL is similar to an unsigned private key in SSH. + +Details of the Foolscap handshake +--------------------------------- + +In this section we detail the precise security handshake that takes place at +the beginning of any network connection in IPython. For the purposes of this +discussion, the SERVER is the IPython controller process and the CLIENT is the +IPython engine or client process. + +Upon starting, all IPython processes do the following: + +1. Create a public key x509 certificate (ISO/IEC 9594). +2. Create a hash of the contents of the certificate using the SHA-1 algorithm. + The base-32 encoded version of this hash is saved by the process as its + process id (actually in Foolscap, this is the Tub id, but here refer to + it as the process id). + +Upon starting, the IPython controller also does the following: + +1. Save the x509 certificate to disk in a secure location. The CLIENT + certificate is never saved to disk. +2. Create a FURL for each capability that the controller has. There are + separate capabilities the controller offers for clients and engines. The + FURL is created using: a) the process id of the SERVER, b) the IP + address and port the SERVER is listening on and c) a 160 bit, + cryptographically secure string that represents the capability (the + "capability id"). +3. The FURLs are saved to disk in a secure location on the SERVER's host. + +For a CLIENT to be able to connect to the SERVER and access a capability of +that SERVER, the CLIENT must have knowledge of the FURL for that SERVER's +capability. This typically requires that the file containing the FURL be +moved from the SERVER's host to the CLIENT's host. This is done by the end +user who started the SERVER and wishes to have a CLIENT connect to the SERVER. + +When a CLIENT connects to the SERVER, the following handshake protocol takes +place: + +1. The CLIENT tells the SERVER what process (or Tub) id it expects the SERVER + to have. +2. If the SERVER has that process id, it notifies the CLIENT that it will now + enter encrypted mode. If the SERVER has a different id, the SERVER aborts. +3. Both CLIENT and SERVER initiate the SSL handshake protocol. +4. Both CLIENT and SERVER request the certificate of their peer and verify + that certificate. If this succeeds, all further communications are + encrypted. +5. Both CLIENT and SERVER send a hello block containing connection parameters + and their process id. +6. The CLIENT and SERVER check that their peer's stated process id matches the + hash of the x509 certificate the peer presented. If not, the connection is + aborted. +7. The CLIENT verifies that the SERVER's stated id matches the id of the + SERVER the CLIENT is intending to connect to. If not, the connection is + aborted. +8. The CLIENT and SERVER elect a master who decides on the final connection + parameters. + +The public/private key pair associated with each process's x509 certificate +are completely hidden from this handshake protocol. There are however, used +internally by OpenSSL as part of the SSL handshake protocol. Each process +keeps their own private key hidden and sends its peer only the public key +(embedded in the certificate). + +Finally, when the CLIENT requests access to a particular SERVER capability, +the following happens: + +1. The CLIENT asks the SERVER for access to a capability by presenting that + capabilities id. +2. If the SERVER has a capability with that id, access is granted. If not, + access is not granted. +3. Once access has been gained, the CLIENT can use the capability. + +Specific security vulnerabilities +================================= + +There are a number of potential security vulnerabilities present in IPython's +architecture. In this section we discuss those vulnerabilities and detail how +the security architecture described above prevents them from being exploited. + +Unauthorized clients +-------------------- + +The IPython client can instruct the IPython engines to execute arbitrary +Python code with the permissions of the user who started the engines. If an +attacker were able to connect their own hostile IPython client to the IPython +controller, they could instruct the engines to execute code. + +This attack is prevented by the capabilities based client authentication +performed after the encrypted channel has been established. The relevant +authentication information is encoded into the FURL that clients must +present to gain access to the IPython controller. By limiting the distribution +of those FURLs, a user can grant access to only authorized persons. + +It is highly unlikely that a client FURL could be guessed by an attacker +in a brute force guessing attack. A given instance of the IPython controller +only runs for a relatively short amount of time (on the order of hours). Thus +an attacker would have only a limited amount of time to test a search space of +size 2**320. Furthermore, even if a controller were to run for a longer amount +of time, this search space is quite large (larger for instance than that of +typical username/password pair). + +Unauthorized engines +-------------------- + +If an attacker were able to connect a hostile engine to a user's controller, +the user might unknowingly send sensitive code or data to the hostile engine. +This attacker's engine would then have full access to that code and data. + +This type of attack is prevented in the same way as the unauthorized client +attack, through the usage of the capabilities based authentication scheme. + +Unauthorized controllers +------------------------ + +It is also possible that an attacker could try to convince a user's IPython +client or engine to connect to a hostile IPython controller. That controller +would then have full access to the code and data sent between the IPython +client and the IPython engines. + +Again, this attack is prevented through the FURLs, which ensure that a +client or engine connects to the correct controller. It is also important to +note that the FURLs also encode the IP address and port that the +controller is listening on, so there is little chance of mistakenly connecting +to a controller running on a different IP address and port. + +When starting an engine or client, a user must specify which FURL to use +for that connection. Thus, in order to introduce a hostile controller, the +attacker must convince the user to use the FURLs associated with the +hostile controller. As long as a user is diligent in only using FURLs from +trusted sources, this attack is not possible. + +Other security measures +======================= + +A number of other measures are taken to further limit the security risks +involved in running the IPython kernel. + +First, by default, the IPython controller listens on random port numbers. +While this can be overridden by the user, in the default configuration, an +attacker would have to do a port scan to even find a controller to attack. +When coupled with the relatively short running time of a typical controller +(on the order of hours), an attacker would have to work extremely hard and +extremely *fast* to even find a running controller to attack. + +Second, much of the time, especially when run on supercomputers or clusters, +the controller is running behind a firewall. Thus, for engines or client to +connect to the controller: + +* The different processes have to all be behind the firewall. + +or: + +* The user has to use SSH port forwarding to tunnel the + connections through the firewall. + +In either case, an attacker is presented with addition barriers that prevent +attacking or even probing the system. + +Summary +======= + +IPython's architecture has been carefully designed with security in mind. The +capabilities based authentication model, in conjunction with the encrypted +TCP/IP channels, address the core potential vulnerabilities in the system, +while still enabling user's to use the system in open networks. + +Other questions +=============== + +About keys +---------- + +Can you clarify the roles of the certificate and its keys versus the FURL, +which is also called a key? + +The certificate created by IPython processes is a standard public key x509 +certificate, that is used by the SSL handshake protocol to setup encrypted +channel between the controller and the IPython engine or client. This public +and private key associated with this certificate are used only by the SSL +handshake protocol in setting up this encrypted channel. + +The FURL serves a completely different and independent purpose from the +key pair associated with the certificate. When we refer to a FURL as a +key, we are using the word "key" in the capabilities based security model +sense. This has nothing to do with "key" in the public/private key sense used +in the SSL protocol. + +With that said the FURL is used as an cryptographic key, to grant +IPython engines and clients access to particular capabilities that the +controller offers. + +Self signed certificates +------------------------ + +Is the controller creating a self-signed certificate? Is this created for per +instance/session, one-time-setup or each-time the controller is started? + +The Foolscap network protocol, which handles the SSL protocol details, creates +a self-signed x509 certificate using OpenSSL for each IPython process. The +lifetime of the certificate is handled differently for the IPython controller +and the engines/client. + +For the IPython engines and client, the certificate is only held in memory for +the lifetime of its process. It is never written to disk. + +For the controller, the certificate can be created anew each time the +controller starts or it can be created once and reused each time the +controller starts. If at any point, the certificate is deleted, a new one is +created the next time the controller starts. + +SSL private key +--------------- + +How the private key (associated with the certificate) is distributed? + +In the usual implementation of the SSL protocol, the private key is never +distributed. We follow this standard always. + +SSL versus Foolscap authentication +---------------------------------- + +Many SSL connections only perform one sided authentication (the server to the +client). How is the client authentication in IPython's system related to SSL +authentication? + +We perform a two way SSL handshake in which both parties request and verify +the certificate of their peer. This mutual authentication is handled by the +SSL handshake and is separate and independent from the additional +authentication steps that the CLIENT and SERVER perform after an encrypted +channel is established. + +.. [RFC5246] diff --git a/docs/source/parallel/parallel_task.txt b/docs/source/parallel/parallel_task.txt index a307439..14d4565 100644 --- a/docs/source/parallel/parallel_task.txt +++ b/docs/source/parallel/parallel_task.txt @@ -4,8 +4,6 @@ The IPython task interface ========================== -.. contents:: - The task interface to the controller presents the engines as a fault tolerant, dynamic load-balanced system or workers. Unlike the multiengine interface, in the task interface, the user have no direct access to individual engines. In some ways, this interface is simpler, but in other ways it is more powerful. Best of all the user can use both of these interfaces running at the same time to take advantage or both of their strengths. When the user can break up the user's work into segments that do not depend on previous execution, the task interface is ideal. But it also has more power and flexibility, allowing the user to guide the distribution of jobs, without having to assign tasks to engines explicitly. @@ -17,7 +15,7 @@ To follow along with this tutorial, you will need to start the IPython controller and four IPython engines. The simplest way of doing this is to use the :command:`ipcluster` command:: - $ ipcluster -n 4 + $ ipcluster local -n 4 For more detailed information about starting the controller and engines, see our :ref:`introduction ` to using IPython for parallel computing. @@ -26,7 +24,9 @@ Creating a ``TaskClient`` instance ========================================= The first step is to import the IPython :mod:`IPython.kernel.client` module -and then create a :class:`TaskClient` instance:: +and then create a :class:`TaskClient` instance: + +.. sourcecode:: ipython In [1]: from IPython.kernel import client @@ -34,10 +34,12 @@ and then create a :class:`TaskClient` instance:: This form assumes that the :file:`ipcontroller-tc.furl` is in the :file:`~./ipython/security` directory on the client's host. If not, the -location of the ``.furl`` file must be given as an argument to the -constructor:: +location of the FURL file must be given as an argument to the +constructor: + +.. sourcecode:: ipython - In[2]: mec = client.TaskClient('/path/to/my/ipcontroller-tc.furl') + In [2]: mec = client.TaskClient('/path/to/my/ipcontroller-tc.furl') Quick and easy parallelism ========================== @@ -47,7 +49,9 @@ In many cases, you simply want to apply a Python function to a sequence of objec Parallel map ------------ -The parallel :meth:`map` in the task interface is similar to that in the multiengine interface:: +The parallel :meth:`map` in the task interface is similar to that in the multiengine interface: + +.. sourcecode:: ipython In [63]: serial_result = map(lambda x:x**10, range(32)) @@ -59,7 +63,9 @@ The parallel :meth:`map` in the task interface is similar to that in the multien Parallel function decorator --------------------------- -Parallel functions are just like normal function, but they can be called on sequences and *in parallel*. The multiengine interface provides a decorator that turns any Python function into a parallel function:: +Parallel functions are just like normal function, but they can be called on sequences and *in parallel*. The multiengine interface provides a decorator that turns any Python function into a parallel function: + +.. sourcecode:: ipython In [10]: @tc.parallel() ....: def f(x): diff --git a/sandbox/asynparallel.py b/sandbox/asynparallel.py new file mode 100644 index 0000000..35b815b --- /dev/null +++ b/sandbox/asynparallel.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +"""A parallel tasking tool that uses asynchronous programming. This uses +blocking client to get taskid, but returns a Deferred as the result of +run(). Users should attach their callbacks on these Deferreds. + +Only returning of results is asynchronous. Submitting tasks and getting task +ids are done synchronously. + +Yichun Wei 03/2008 +""" + +import inspect +import itertools +import numpy as N + +from twisted.python import log +from ipython1.kernel import client +from ipython1.kernel.client import Task + +""" After http://trac.pocoo.org/repos/pocoo/trunk/pocoo/utils/decorators.py +""" +class submit_job(object): + """ a decorator factory: takes a MultiEngineClient a TaskClient, returns a + decorator, that makes a call to the decorated func as a task in ipython1 + and submit it to IPython1 controller: + """ + def __init__(self, rc, tc): + self.rc = rc + self.tc = tc + + def __call__(self, func): + return self._decorate(func) + + def _getinfo(self, func): + assert inspect.ismethod(func) or inspect.isfunction(func) + regargs, varargs, varkwargs, defaults = inspect.getargspec(func) + argnames = list(regargs) + if varargs: + argnames.append(varargs) + if varkwargs: + argnames.append(varkwargs) + counter = itertools.count() + fullsign = inspect.formatargspec( + regargs, varargs, varkwargs, defaults, + formatvalue=lambda value: '=defarg[%i]' % counter.next())[1:-1] + shortsign = inspect.formatargspec( + regargs, varargs, varkwargs, defaults, + formatvalue=lambda value: '')[1:-1] + dic = dict(('arg%s' % n, name) for n, name in enumerate(argnames)) + dic.update(name=func.__name__, argnames=argnames, shortsign=shortsign, + fullsign = fullsign, defarg = func.func_defaults or ()) + return dic + + def _decorate(self, func): + """ + Takes a function and a remote controller and returns a function + decorated that is going to submit the job with the controller. + The decorated function is obtained by evaluating a lambda + function with the correct signature. + + the TaskController setupNS doesn't cope with functions, but we + can use RemoteController to push functions/modules into engines. + + Changes: + 200803. In new ipython1, we use push_function for functions. + """ + rc, tc = self.rc, self.tc + infodict = self._getinfo(func) + if 'rc' in infodict['argnames']: + raise NameError, "You cannot use rc as argument names!" + + # we assume the engines' namepace has been prepared. + # ns[func.__name__] is already the decorated closure function. + # we need to change it back to the original function: + ns = {} + ns[func.__name__] = func + + # push func and all its environment/prerequesites to engines + rc.push_function(ns, block=True) # note it is nonblock by default, not know if it causes problems + + def do_submit_func(*args, **kwds): + jobns = {} + + # Initialize job namespace with args that have default args + # now we support calls that uses default args + for n in infodict['fullsign'].split(','): + try: + vname, var = n.split('=') + vname, var = vname.strip(), var.strip() + except: # no defarg, one of vname, var is None + pass + else: + jobns.setdefault(vname, eval(var, infodict)) + + # push args and kwds, overwritting default args if needed. + nokwds = dict((n,v) for n,v in zip(infodict['argnames'], args)) # truncated + jobns.update(nokwds) + jobns.update(kwds) + + task = Task('a_very_long_and_rare_name = %(name)s(%(shortsign)s)' % infodict, + pull=['a_very_long_and_rare_name'], push=jobns,) + jobid = tc.run(task) + # res is a deferred, one can attach callbacks on it + res = tc.task_controller.get_task_result(jobid, block=True) + res.addCallback(lambda x: x.ns['a_very_long_and_rare_name']) + res.addErrback(log.err) + return res + + do_submit_func.rc = rc + do_submit_func.tc = tc + return do_submit_func + + +def parallelized(rc, tc, initstrlist=[]): + """ rc - remote controller + tc - taks controller + strlist - a list of str that's being executed on engines. + """ + for cmd in initstrlist: + rc.execute(cmd, block=True) + return submit_job(rc, tc) + + +from twisted.internet import defer +from numpy import array, nan + +def pmap(func, parr, **kwds): + """Run func on every element of parr (array), using the elements + as the only one parameter (so you can usually use a dict that + wraps many parameters). -> a result array of Deferreds with the + same shape. func.tc will be used as the taskclient. + + **kwds are passed on to func, not changed. + """ + assert func.tc + tc = func.tc + + def run(p, **kwds): + if p: + return func(p, **kwds) + else: + return defer.succeed(nan) + + reslist = [run(p, **kwds).addErrback(log.err) for p in parr.flat] + resarr = array(reslist) + resarr.shape = parr.shape + return resarr + + +if __name__=='__main__': + + rc = client.MultiEngineClient(client.default_address) + tc = client.TaskClient(client.default_task_address) + + # if commenting out the decorator you get a local running version + # instantly + @parallelized(rc, tc) + def f(a, b=1): + #from time import sleep + #sleep(1) + print "a,b=", a,b + return a+b + + def showres(x): + print 'ans:',x + + res = f(11,5) + res.addCallback(showres) + + # this is not necessary in Twisted 8.0 + from twisted.internet import reactor + reactor.run() diff --git a/sandbox/minitraits.py b/sandbox/minitraits.py new file mode 100644 index 0000000..7442554 --- /dev/null +++ b/sandbox/minitraits.py @@ -0,0 +1,119 @@ +import types + +class AttributeBase(object): + + def __get__(self, inst, cls=None): + if inst is None: + return self + try: + return inst._attributes[self.name] + except KeyError: + raise AttributeError("object has no attribute %r" % self.name) + + def __set__(self, inst, value): + actualValue = self.validate(inst, self.name, value) + inst._attributes[self.name] = actualValue + + def validate(self, inst, name, value): + raise NotImplementedError("validate must be implemented by a subclass") + +class NameFinder(type): + + def __new__(cls, name, bases, classdict): + attributeList = [] + for k,v in classdict.iteritems(): + if isinstance(v, AttributeBase): + v.name = k + attributeList.append(k) + classdict['_attributeList'] = attributeList + return type.__new__(cls, name, bases, classdict) + +class HasAttributes(object): + __metaclass__ = NameFinder + + def __init__(self): + self._attributes = {} + + def getAttributeNames(self): + return self._attributeList + + def getAttributesOfType(self, t, default=None): + result = {} + for a in self._attributeList: + if self.__class__.__dict__[a].__class__ == t: + try: + value = getattr(self, a) + except AttributeError: + value = None + result[a] = value + return result + +class TypedAttribute(AttributeBase): + + def validate(self, inst, name, value): + if type(value) != self._type: + raise TypeError("attribute %s must be of type %s" % (name, self._type)) + else: + return value + +# class Option(TypedAttribute): +# +# _type = types.IntType +# +# class Param(TypedAttribute): +# +# _type = types.FloatType +# +# class String(TypedAttribute): +# +# _type = types.StringType + +class TypedSequenceAttribute(AttributeBase): + + def validate(self, inst, name, value): + if type(value) != types.TupleType and type(value) != types.ListType: + raise TypeError("attribute %s must be a list or tuple" % (name)) + else: + for item in value: + if type(item) != self._subtype: + raise TypeError("attribute %s must be a list or tuple of items with type %s" % (name, self._subtype)) + return value + +# class Instance(AttributeBase): +# +# def __init__(self, cls): +# self.cls = cls +# +# def validate(self, inst, name, value): +# if not isinstance(value, self.cls): +# raise TypeError("attribute %s must be an instance of class %s" % (name, self.cls)) +# else: +# return value + + +# class OptVec(TypedSequenceAttribute): +# +# _subtype = types.IntType +# +# class PrmVec(TypedSequenceAttribute): +# +# _subtype = types.FloatType +# +# class StrVec(TypedSequenceAttribute): +# +# _subtype = types.StringType +# +# +# class Bar(HasAttributes): +# +# a = Option() +# +# class Foo(HasAttributes): +# +# a = Option() +# b = Param() +# c = String() +# d = OptVec() +# e = PrmVec() +# f = StrVec() +# h = Instance(Bar) \ No newline at end of file