# -*- coding: utf-8 -*- """ ====== Rmagic ====== Magic command interface for interactive work with R via rpy2 .. note:: The ``rpy2`` package needs to be installed separately. It can be obtained using ``easy_install`` or ``pip``. You will also need a working copy of R. Usage ===== To enable the magics below, execute ``%load_ext rmagic``. ``%R`` {R_DOC} ``%Rpush`` {RPUSH_DOC} ``%Rpull`` {RPULL_DOC} ``%Rget`` {RGET_DOC} """ from __future__ import print_function #----------------------------------------------------------------------------- # Copyright (C) 2012 The IPython Development Team # # Distributed under the terms of the BSD License. The full license is in # the file COPYING, distributed as part of this software. #----------------------------------------------------------------------------- import sys import tempfile from glob import glob from shutil import rmtree import warnings # numpy and rpy2 imports import numpy as np import rpy2.rinterface as ri import rpy2.robjects as ro try: from rpy2.robjects import pandas2ri pandas2ri.activate() except ImportError: pandas2ri = None from rpy2.robjects import numpy2ri numpy2ri.activate() # IPython imports from IPython.core.displaypub import publish_display_data from IPython.core.magic import (Magics, magics_class, line_magic, line_cell_magic, needs_local_scope) from IPython.testing.skipdoctest import skip_doctest from IPython.core.magic_arguments import ( argument, magic_arguments, parse_argstring ) from simplegeneric import generic from IPython.utils.py3compat import (str_to_unicode, unicode_to_str, PY3, unicode_type) from IPython.utils.text import dedent class RInterpreterError(ri.RRuntimeError): """An error when running R code in a %%R magic cell.""" def __init__(self, line, err, stdout): self.line = line self.err = err.rstrip() self.stdout = stdout.rstrip() def __unicode__(self): s = 'Failed to parse and evaluate line %r.\nR error message: %r' % \ (self.line, self.err) if self.stdout and (self.stdout != self.err): s += '\nR stdout:\n' + self.stdout return s if PY3: __str__ = __unicode__ else: def __str__(self): return unicode_to_str(unicode(self), 'utf-8') def Rconverter(Robj, dataframe=False): """ Convert an object in R's namespace to one suitable for ipython's namespace. For a data.frame, it tries to return a structured array. It first checks for colnames, then names. If all are NULL, it returns np.asarray(Robj), else it tries to construct a recarray Parameters ---------- Robj: an R object returned from rpy2 """ is_data_frame = ro.r('is.data.frame') colnames = ro.r('colnames') rownames = ro.r('rownames') # with pandas, these could be used for the index names = ro.r('names') if dataframe: as_data_frame = ro.r('as.data.frame') cols = colnames(Robj) _names = names(Robj) if cols != ri.NULL: Robj = as_data_frame(Robj) names = tuple(np.array(cols)) elif _names != ri.NULL: names = tuple(np.array(_names)) else: # failed to find names return np.asarray(Robj) Robj = np.rec.fromarrays(Robj, names = names) return np.asarray(Robj) @generic def pyconverter(pyobj): """Convert Python objects to R objects. Add types using the decorator: @pyconverter.when_type """ return pyobj # The default conversion for lists seems to make them a nested list. That has # some advantages, but is rarely convenient, so for interactive use, we convert # lists to a numpy array, which becomes an R vector. @pyconverter.when_type(list) def pyconverter_list(pyobj): return np.asarray(pyobj) if pandas2ri is None: # pandas2ri was new in rpy2 2.3.3, so for now we'll fallback to pandas' # conversion function. try: from pandas import DataFrame from pandas.rpy.common import convert_to_r_dataframe @pyconverter.when_type(DataFrame) def pyconverter_dataframe(pyobj): return convert_to_r_dataframe(pyobj, strings_as_factors=True) except ImportError: pass @magics_class class RMagics(Magics): """A set of magics useful for interactive work with R via rpy2. """ def __init__(self, shell, Rconverter=Rconverter, pyconverter=pyconverter, cache_display_data=False): """ Parameters ---------- shell : IPython shell Rconverter : callable To be called on values taken from R before putting them in the IPython namespace. pyconverter : callable To be called on values in ipython namespace before assigning to variables in rpy2. cache_display_data : bool If True, the published results of the final call to R are cached in the variable 'display_cache'. """ super(RMagics, self).__init__(shell) self.cache_display_data = cache_display_data self.r = ro.R() self.Rstdout_cache = [] self.pyconverter = pyconverter self.Rconverter = Rconverter def eval(self, line): ''' Parse and evaluate a line of R code with rpy2. Returns the output to R's stdout() connection, the value generated by evaluating the code, and a boolean indicating whether the return value would be visible if the line of code were evaluated in an R REPL. R Code evaluation and visibility determination are done via an R call of the form withVisible({}) ''' old_writeconsole = ri.get_writeconsole() ri.set_writeconsole(self.write_console) try: res = ro.r("withVisible({%s\n})" % line) value = res[0] #value (R object) visible = ro.conversion.ri2py(res[1])[0] #visible (boolean) except (ri.RRuntimeError, ValueError) as exception: warning_or_other_msg = self.flush() # otherwise next return seems to have copy of error raise RInterpreterError(line, str_to_unicode(str(exception)), warning_or_other_msg) text_output = self.flush() ri.set_writeconsole(old_writeconsole) return text_output, value, visible def write_console(self, output): ''' A hook to capture R's stdout in a cache. ''' self.Rstdout_cache.append(output) def flush(self): ''' Flush R's stdout cache to a string, returning the string. ''' value = ''.join([str_to_unicode(s, 'utf-8') for s in self.Rstdout_cache]) self.Rstdout_cache = [] return value @skip_doctest @needs_local_scope @line_magic def Rpush(self, line, local_ns=None): ''' A line-level magic for R that pushes variables from python to rpy2. The line should be made up of whitespace separated variable names in the IPython namespace:: In [7]: import numpy as np In [8]: X = np.array([4.5,6.3,7.9]) In [9]: X.mean() Out[9]: 6.2333333333333343 In [10]: %Rpush X In [11]: %R mean(X) Out[11]: array([ 6.23333333]) ''' if local_ns is None: local_ns = {} inputs = line.split(' ') for input in inputs: try: val = local_ns[input] except KeyError: try: val = self.shell.user_ns[input] except KeyError: # reraise the KeyError as a NameError so that it looks like # the standard python behavior when you use an unnamed # variable raise NameError("name '%s' is not defined" % input) self.r.assign(input, self.pyconverter(val)) @skip_doctest @magic_arguments() @argument( '-d', '--as_dataframe', action='store_true', default=False, help='Convert objects to data.frames before returning to ipython.' ) @argument( 'outputs', nargs='*', ) @line_magic def Rpull(self, line): ''' A line-level magic for R that pulls variables from python to rpy2:: In [18]: _ = %R x = c(3,4,6.7); y = c(4,6,7); z = c('a',3,4) In [19]: %Rpull x y z In [20]: x Out[20]: array([ 3. , 4. , 6.7]) In [21]: y Out[21]: array([ 4., 6., 7.]) In [22]: z Out[22]: array(['a', '3', '4'], dtype='|S1') If --as_dataframe, then each object is returned as a structured array after first passed through "as.data.frame" in R before being calling self.Rconverter. This is useful when a structured array is desired as output, or when the object in R has mixed data types. See the %%R docstring for more examples. Notes ----- Beware that R names can have '.' so this is not fool proof. To avoid this, don't name your R objects with '.'s... ''' args = parse_argstring(self.Rpull, line) outputs = args.outputs for output in outputs: self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)}) @skip_doctest @magic_arguments() @argument( '-d', '--as_dataframe', action='store_true', default=False, help='Convert objects to data.frames before returning to ipython.' ) @argument( 'output', nargs=1, type=str, ) @line_magic def Rget(self, line): ''' Return an object from rpy2, possibly as a structured array (if possible). Similar to Rpull except only one argument is accepted and the value is returned rather than pushed to self.shell.user_ns:: In [3]: dtype=[('x', '|t|) (Intercept) 0.0800 2.3000 0.035 0.975 X 1.0400 0.4822 2.157 0.164 Residual standard error: 2.088 on 2 degrees of freedom Multiple R-squared: 0.6993,Adjusted R-squared: 0.549 F-statistic: 4.651 on 1 and 2 DF, p-value: 0.1638 In the notebook, plots are published as the output of the cell:: %R plot(X, Y) will create a scatter plot of X bs Y. If cell is not None and line has some R code, it is prepended to the R code in cell. Objects can be passed back and forth between rpy2 and python via the -i -o flags in line:: In [14]: Z = np.array([1,4,5,10]) In [15]: %R -i Z mean(Z) Out[15]: array([ 5.]) In [16]: %R -o W W=Z*mean(Z) Out[16]: array([ 5., 20., 25., 50.]) In [17]: W Out[17]: array([ 5., 20., 25., 50.]) The return value is determined by these rules: * If the cell is not None, the magic returns None. * If the cell evaluates as False, the resulting value is returned unless the final line prints something to the console, in which case None is returned. * If the final line results in a NULL value when evaluated by rpy2, then None is returned. * No attempt is made to convert the final value to a structured array. Use the --dataframe flag or %Rget to push / return a structured array. * If the -n flag is present, there is no return value. * A trailing ';' will also result in no return value as the last value in the line is an empty string. The --dataframe argument will attempt to return structured arrays. This is useful for dataframes with mixed data types. Note also that for a data.frame, if it is returned as an ndarray, it is transposed:: In [18]: dtype=[('x', '