upstream/ipython Commit - r10017:e2ca7dc4

Refactor to prefer rpy2's pandas2ri conversion system.

Thomas Kluyver -

r10017:e2ca7dc4

parent child

IPython/extensions/rmagic.py

0 +30 -8

              # -*- coding: utf-8 -*-
              """
              ======
              Rmagic
              ======
              Magic command interface for interactive work with R via rpy2
              Usage
              =====
              ``%R``
              {R_DOC}
              ``%Rpush``
              {RPUSH_DOC}
              ``%Rpull``
              {RPULL_DOC}
              ``%Rget``
              {RGET_DOC}
              """
              #-----------------------------------------------------------------------------
              #  Copyright (C) 2012 The IPython Development Team
              #
              #  Distributed under the terms of the BSD License.  The full license is in
              #  the file COPYING, distributed as part of this software.
              #-----------------------------------------------------------------------------
              import sys
              import tempfile
              from glob import glob
              from shutil import rmtree
              from getopt import getopt
              # numpy and rpy2 imports
              import numpy as np
              import rpy2.rinterface as ri
              import rpy2.robjects as ro
-             from rpy2.robjects.numpy2ri import numpy2ri
-             ro.conversion.py2ri = numpy2ri
+             try:
+                 from rpy2.robjects import pandas2ri
+                 pandas2ri.activate()
+             except ImportError:
+                 pandas2ri = None
+                 from rpy2.robjects import numpy2ri
+                 numpy2ri.activate()
              # IPython imports
              from IPython.core.displaypub import publish_display_data
              from IPython.core.magic import (Magics, magics_class, cell_magic, line_magic,
                                              line_cell_magic, needs_local_scope)
              from IPython.testing.skipdoctest import skip_doctest
              from IPython.core.magic_arguments import (
                  argument, magic_arguments, parse_argstring
              )
+             from IPython.external.simplegeneric import generic
              from IPython.utils.py3compat import str_to_unicode, unicode_to_str, PY3
              class RInterpreterError(ri.RRuntimeError):
                  """An error when running R code in a %%R magic cell."""
                  def __init__(self, line, err, stdout):
                      self.line = line
                      self.err = err.rstrip()
                      self.stdout = stdout.rstrip()
                  def __unicode__(self):
                      s = 'Failed to parse and evaluate line %r.\nR error message: %r' % \
                              (self.line, self.err)
                      if self.stdout and (self.stdout != self.err):
                          s += '\nR stdout:\n' + self.stdout
                      return s
                  if PY3:
                      __str__ = __unicode__
                  else:
                      def __str__(self):
                          return unicode_to_str(unicode(self), 'utf-8')
              def Rconverter(Robj, dataframe=False):
                  """
                  Convert an object in R's namespace to one suitable
                  for ipython's namespace.
                  For a data.frame, it tries to return a structured array.
                  It first checks for colnames, then names.
                  If all are NULL, it returns np.asarray(Robj), else
                  it tries to construct a recarray
                  Parameters
                  ----------
                  Robj: an R object returned from rpy2
                  """
                  is_data_frame = ro.r('is.data.frame')
                  colnames = ro.r('colnames')
                  rownames = ro.r('rownames') # with pandas, these could be used for the index
                  names = ro.r('names')
                  if dataframe:
                      as_data_frame = ro.r('as.data.frame')
                      cols = colnames(Robj)
                      _names = names(Robj)
                      if cols != ri.NULL:
                          Robj = as_data_frame(Robj)
                          names = tuple(np.array(cols))
                      elif _names != ri.NULL:
                          names = tuple(np.array(_names))
                      else: # failed to find names
                          return np.asarray(Robj)
                      Robj = np.rec.fromarrays(Robj, names = names)
                  return np.asarray(Robj)
+             @generic
              def pyconverter(pyobj):
-                 """Convert Python objects to R objects."""
-                 if 'pandas' in sys.modules:
-                     # We only do this if pandas is already loaded
+                 """Convert Python objects to R objects. Add types using the decorator:
+                 @pyconverter.when_type
+                 """
+                 return pyobj
+             # The default conversion for lists seems to make them a nested list. That has
+             # some advantages, but is rarely convenient, so for interactive use, we convert
+             # lists to a numpy array, which becomes an R vector.
+             @pyconverter.when_type(list)
+             def pyconverter_list(pyobj):
+                 return np.asarray(pyobj)
+             if pandas2ri is None:
+                 # pandas2ri was new in rpy2 2.3.3, so for now we'll fallback to pandas'
+                 # conversion function.
+                 try:
                      from pandas import DataFrame
-                     if isinstance(pyobj, DataFrame):
-                         from pandas.rpy.common import convert_to_r_dataframe
+                     @pyconverter.when_type(DataFrame)
+                     def pyconverter_dataframe(pyobj):
                          return convert_to_r_dataframe(pyobj, strings_as_factors=True)
-                 return np.asarray(pyobj)
+                 except ImportError:
+                     pass
              @magics_class
              class RMagics(Magics):
                  """A set of magics useful for interactive work with R via rpy2.
                  """
                  def __init__(self, shell, Rconverter=Rconverter,
                               pyconverter=pyconverter,
                               cache_display_data=False):
                      """
                      Parameters
                      ----------
                      shell : IPython shell
                      Rconverter : callable
                          To be called on values taken from R before putting them in the
                          IPython namespace.
                      pyconverter : callable
                          To be called on values in ipython namespace before
                          assigning to variables in rpy2.
                      cache_display_data : bool
                          If True, the published results of the final call to R are
                          cached in the variable 'display_cache'.
                      """
                      super(RMagics, self).__init__(shell)
                      self.cache_display_data = cache_display_data
                      self.r = ro.R()
                      self.Rstdout_cache = []
                      self.pyconverter = pyconverter
                      self.Rconverter = Rconverter
                  def eval(self, line):
                      '''
                      Parse and evaluate a line with rpy2.
                      Returns the output to R's stdout() connection
                      and the value of eval(parse(line)).
                      '''
                      old_writeconsole = ri.get_writeconsole()
                      ri.set_writeconsole(self.write_console)
                      try:
                          value = ri.baseenv['eval'](ri.parse(line))
                      except (ri.RRuntimeError, ValueError) as exception:
                          warning_or_other_msg = self.flush() # otherwise next return seems to have copy of error
                          raise RInterpreterError(line, str_to_unicode(str(exception)), warning_or_other_msg)
                      text_output = self.flush()
                      ri.set_writeconsole(old_writeconsole)
                      return text_output, value
                  def write_console(self, output):
                      '''
                      A hook to capture R's stdout in a cache.
                      '''
                      self.Rstdout_cache.append(output)
                  def flush(self):
                      '''
                      Flush R's stdout cache to a string, returning the string.
                      '''
                      value = ''.join([str_to_unicode(s, 'utf-8') for s in self.Rstdout_cache])
                      self.Rstdout_cache = []
                      return value
                  @skip_doctest
                  @needs_local_scope
                  @line_magic
                  def Rpush(self, line, local_ns=None):
                      '''
                      A line-level magic for R that pushes
                      variables from python to rpy2. The line should be made up
                      of whitespace separated variable names in the IPython
                      namespace::
                          In [7]: import numpy as np
                          In [8]: X = np.array([4.5,6.3,7.9])
                          In [9]: X.mean()
                          Out[9]: 6.2333333333333343
                          In [10]: %Rpush X
                          In [11]: %R mean(X)
                          Out[11]: array([ 6.23333333])
                      '''
                      if local_ns is None:
                          local_ns = {}
                      inputs = line.split(' ')
                      for input in inputs:
                          try:
                              val = local_ns[input]
                          except KeyError:
                              try:
                                  val = self.shell.user_ns[input]
                              except KeyError:
                                  # reraise the KeyError as a NameError so that it looks like
                                  # the standard python behavior when you use an unnamed
                                  # variable
                                  raise NameError("name '%s' is not defined" % input)
                          self.r.assign(input, self.pyconverter(val))
                  @skip_doctest
                  @magic_arguments()
                  @argument(
                      '-d', '--as_dataframe', action='store_true',
                      default=False,
                      help='Convert objects to data.frames before returning to ipython.'
                      )
                  @argument(
                      'outputs',
                      nargs='*',
                      )
                  @line_magic
                  def Rpull(self, line):
                      '''
                      A line-level magic for R that pulls
                      variables from python to rpy2::
                          In [18]: _ = %R x = c(3,4,6.7); y = c(4,6,7); z = c('a',3,4)
                          In [19]: %Rpull x  y z
                          In [20]: x
                          Out[20]: array([ 3. ,  4. ,  6.7])
                          In [21]: y
                          Out[21]: array([ 4.,  6.,  7.])
                          In [22]: z
                          Out[22]:
                          array(['a', '3', '4'],
                                dtype='|S1')
                      If --as_dataframe, then each object is returned as a structured array
                      after first passed through "as.data.frame" in R before
                      being calling self.Rconverter.
                      This is useful when a structured array is desired as output, or
                      when the object in R has mixed data types.
                      See the %%R docstring for more examples.
                      Notes
                      -----
                      Beware that R names can have '.' so this is not fool proof.
                      To avoid this, don't name your R objects with '.'s...
                      '''
                      args = parse_argstring(self.Rpull, line)
                      outputs = args.outputs
                      for output in outputs:
                          self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)})
                  @skip_doctest
                  @magic_arguments()
                  @argument(
                      '-d', '--as_dataframe', action='store_true',
                      default=False,
                      help='Convert objects to data.frames before returning to ipython.'
                      )
                  @argument(
                      'output',
                      nargs=1,
                      type=str,
                      )
                  @line_magic
                  def Rget(self, line):
                      '''
                      Return an object from rpy2, possibly as a structured array (if possible).
                      Similar to Rpull except only one argument is accepted and the value is
                      returned rather than pushed to self.shell.user_ns::
                          In [3]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
                          In [4]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
                          In [5]: %R -i datapy
                          In [6]: %Rget datapy
                          Out[6]:
                          array([['1', '2', '3', '4'],
                                 ['2', '3', '2', '5'],
                                 ['a', 'b', 'c', 'e']],
                                dtype='|S1')
                          In [7]: %Rget -d datapy
                          Out[7]:
                          array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
                                dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
                      '''
                      args = parse_argstring(self.Rget, line)
                      output = args.output
                      return self.Rconverter(self.r(output[0]),dataframe=args.as_dataframe)
                  @skip_doctest
                  @magic_arguments()
                  @argument(
                      '-i', '--input', action='append',
                      help='Names of input variable from shell.user_ns to be assigned to R variables of the same names after calling self.pyconverter. Multiple names can be passed separated only by commas with no whitespace.'
                      )
                  @argument(
                      '-o', '--output', action='append',
                      help='Names of variables to be pushed from rpy2 to shell.user_ns after executing cell body and applying self.Rconverter. Multiple names can be passed separated only by commas with no whitespace.'
                      )
                  @argument(
                      '-w', '--width', type=int,
                      help='Width of png plotting device sent as an argument to *png* in R.'
                      )
                  @argument(
                      '-h', '--height', type=int,
                      help='Height of png plotting device sent as an argument to *png* in R.'
                      )
                  @argument(
                      '-d', '--dataframe', action='append',
                      help='Convert these objects to data.frames and return as structured arrays.'
                      )
                  @argument(
                      '-u', '--units', type=unicode, choices=["px", "in", "cm", "mm"],
                      help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
                      )
                  @argument(
                      '-r', '--res', type=int,
                      help='Resolution of png plotting device sent as an argument to *png* in R. Defaults to 72 if *units* is one of ["in", "cm", "mm"].'
                      )
                  @argument(
                      '-p', '--pointsize', type=int,
                      help='Pointsize of png plotting device sent as an argument to *png* in R.'
                      )
                  @argument(
                      '-b', '--bg',
                      help='Background of png plotting device sent as an argument to *png* in R.'
                      )
                  @argument(
                      '-n', '--noreturn',
                      help='Force the magic to not return anything.',
                      action='store_true',
                      default=False
                      )
                  @argument(
                      'code',
                      nargs='*',
                      )
                  @needs_local_scope
                  @line_cell_magic
                  def R(self, line, cell=None, local_ns=None):
                      '''
                      Execute code in R, and pull some of the results back into the Python namespace.
                      In line mode, this will evaluate an expression and convert the returned value to a Python object.
                      The return value is determined by rpy2's behaviour of returning the result of evaluating the
                      final line.
                      Multiple R lines can be executed by joining them with semicolons::
                          In [9]: %R X=c(1,4,5,7); sd(X); mean(X)
                          Out[9]: array([ 4.25])
                      As a cell, this will run a block of R code, without bringing anything back by default::
                          In [10]: %%R
                             ....: Y = c(2,4,3,9)
                             ....: print(summary(lm(Y~X)))
                             ....:
                          Call:
                          lm(formula = Y ~ X)
                          Residuals:
 2     3     4
 .88 -0.24 -2.28  1.64
                          Coefficients:
                                      Estimate Std. Error t value Pr(>|t|)
                          (Intercept)   0.0800     2.3000   0.035    0.975
                          X             1.0400     0.4822   2.157    0.164
                          Residual standard error: 2.088 on 2 degrees of freedom
                          Multiple R-squared: 0.6993,Adjusted R-squared: 0.549
                          F-statistic: 4.651 on 1 and 2 DF,  p-value: 0.1638
                      In the notebook, plots are published as the output of the cell.
                      %R plot(X, Y)
                      will create a scatter plot of X bs Y.
                      If cell is not None and line has some R code, it is prepended to
                      the R code in cell.
                      Objects can be passed back and forth between rpy2 and python via the -i -o flags in line::
                          In [14]: Z = np.array([1,4,5,10])
                          In [15]: %R -i Z mean(Z)
                          Out[15]: array([ 5.])
                          In [16]: %R -o W W=Z*mean(Z)
                          Out[16]: array([  5.,  20.,  25.,  50.])
                          In [17]: W
                          Out[17]: array([  5.,  20.,  25.,  50.])
                      The return value is determined by these rules:
                      * If the cell is not None, the magic returns None.
                      * If the cell evaluates as False, the resulting value is returned
                      unless the final line prints something to the console, in
                      which case None is returned.
                      * If the final line results in a NULL value when evaluated
                      by rpy2, then None is returned.
                      * No attempt is made to convert the final value to a structured array.
                      Use the --dataframe flag or %Rget to push / return a structured array.
                      * If the -n flag is present, there is no return value.
                      * A trailing ';' will also result in no return value as the last
                      value in the line is an empty string.
                      The --dataframe argument will attempt to return structured arrays.
                      This is useful for dataframes with
                      mixed data types. Note also that for a data.frame,
                      if it is returned as an ndarray, it is transposed::
                          In [18]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
                          In [19]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
                          In [20]: %%R -o datar
                          datar = datapy
                             ....:
                          In [21]: datar
                          Out[21]:
                          array([['1', '2', '3', '4'],
                                 ['2', '3', '2', '5'],
                                 ['a', 'b', 'c', 'e']],
                                dtype='|S1')
                          In [22]: %%R -d datar
                          datar = datapy
                             ....:
                          In [23]: datar
                          Out[23]:
                          array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
                                dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
                      The --dataframe argument first tries colnames, then names.
                      If both are NULL, it returns an ndarray (i.e. unstructured)::
                          In [1]: %R mydata=c(4,6,8.3); NULL
                          In [2]: %R -d mydata
                          In [3]: mydata
                          Out[3]: array([ 4. ,  6. ,  8.3])
                          In [4]: %R names(mydata) = c('a','b','c'); NULL
                          In [5]: %R -d mydata
                          In [6]: mydata
                          Out[6]:
                          array((4.0, 6.0, 8.3),
                                dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
                          In [7]: %R -o mydata
                          In [8]: mydata
                          Out[8]: array([ 4. ,  6. ,  8.3])
                      '''
                      args = parse_argstring(self.R, line)
                      # arguments 'code' in line are prepended to
                      # the cell lines
                      if cell is None:
                          code = ''
                          return_output = True
                          line_mode = True
                      else:
                          code = cell
                          return_output = False
                          line_mode = False
                      code = ' '.join(args.code) + code
                      # if there is no local namespace then default to an empty dict
                      if local_ns is None:
                          local_ns = {}
                      if args.input:
                          for input in ','.join(args.input).split(','):
                              try:
                                  val = local_ns[input]
                              except KeyError:
                                  try:
                                      val = self.shell.user_ns[input]
                                  except KeyError:
                                      raise NameError("name '%s' is not defined" % input)
                              self.r.assign(input, self.pyconverter(val))
                      if getattr(args, 'units') is not None:
                          if args.units != "px" and getattr(args, 'res') is None:
                              args.res = 72
                          args.units = '"%s"' % args.units
                      png_argdict = dict([(n, getattr(args, n)) for n in ['units', 'res', 'height', 'width', 'bg', 'pointsize']])
                      png_args = ','.join(['%s=%s' % (o,v) for o, v in png_argdict.items() if v is not None])
                      # execute the R code in a temporary directory
                      tmpd = tempfile.mkdtemp()
                      self.r('png("%s/Rplots%%03d.png",%s)' % (tmpd.replace('\\', '/'), png_args))
                      text_output = ''
                      if line_mode:
                          for line in code.split(';'):
                              text_result, result = self.eval(line)
                              text_output += text_result
                          if text_result:
                              # the last line printed something to the console so we won't return it
                              return_output = False
                      else:
                          text_result, result = self.eval(code)
                          text_output += text_result
                      self.r('dev.off()')
                      # read out all the saved .png files
                      images = [open(imgfile, 'rb').read() for imgfile in glob("%s/Rplots*png" % tmpd)]
                      # now publish the images
                      # mimicking IPython/zmq/pylab/backend_inline.py
                      fmt = 'png'
                      mimetypes = { 'png' : 'image/png', 'svg' : 'image/svg+xml' }
                      mime = mimetypes[fmt]
                      # publish the printed R objects, if any
                      display_data = []
                      if text_output:
                          display_data.append(('RMagic.R', {'text/plain':text_output}))
                      # flush text streams before sending figures, helps a little with output
                      for image in images:
                          # synchronization in the console (though it's a bandaid, not a real sln)
                          sys.stdout.flush(); sys.stderr.flush()
                          display_data.append(('RMagic.R', {mime: image}))
                      # kill the temporary directory
                      rmtree(tmpd)
                      # try to turn every output into a numpy array
                      # this means that output are assumed to be castable
                      # as numpy arrays
                      if args.output:
                          for output in ','.join(args.output).split(','):
                              self.shell.push({output:self.Rconverter(self.r(output), dataframe=False)})
                      if args.dataframe:
                          for output in ','.join(args.dataframe).split(','):
                              self.shell.push({output:self.Rconverter(self.r(output), dataframe=True)})
                      for tag, disp_d in display_data:
                          publish_display_data(tag, disp_d)
                      # this will keep a reference to the display_data
                      # which might be useful to other objects who happen to use
                      # this method
                      if self.cache_display_data:
                          self.display_cache = display_data
                      # if in line mode and return_output, return the result as an ndarray
                      if return_output and not args.noreturn:
                          if result != ri.NULL:
                              return self.Rconverter(result, dataframe=False)
              __doc__ = __doc__.format(
                              R_DOC = ' '*8 + RMagics.R.__doc__,
                              RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
                              RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__,
                              RGET_DOC = ' '*8 + RMagics.Rget.__doc__
              )
              def load_ipython_extension(ip):
                  """Load the extension in IPython."""
                  ip.register_magics(RMagics)
                  # Initialising rpy2 interferes with readline. Since, at this point, we've
                  # probably just loaded rpy2, we reset the delimiters. See issue gh-2759.
                  if ip.has_readline:
                      ip.readline.set_completer_delims(ip.readline_delims)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages