##// END OF EJS Templates
added Rget, and options to try to return strucutred array
Jonathan Taylor -
Show More
@@ -1,434 +1,564
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 ======
4 4 Rmagic
5 5 ======
6 6
7 7 Magic command interface for interactive work with R via rpy2
8 8
9 9 Usage
10 10 =====
11 11
12 12 ``%R``
13 13
14 14 {R_DOC}
15 15
16 16 ``%Rpush``
17 17
18 18 {RPUSH_DOC}
19 19
20 20 ``%Rpull``
21 21
22 22 {RPULL_DOC}
23 23
24 ``%Rget``
25
26 {RGET_DOC}
27
24 28 """
25 29
26 30 #-----------------------------------------------------------------------------
27 31 # Copyright (C) 2012 The IPython Development Team
28 32 #
29 33 # Distributed under the terms of the BSD License. The full license is in
30 34 # the file COPYING, distributed as part of this software.
31 35 #-----------------------------------------------------------------------------
32 36
33 37 import sys
34 38 import tempfile
35 39 from glob import glob
36 40 from shutil import rmtree
37 41 from getopt import getopt
38 42
39 43 # numpy and rpy2 imports
40 44
41 45 import numpy as np
42 46
43 47 import rpy2.rinterface as ri
44 48 import rpy2.robjects as ro
45 49 from rpy2.robjects.numpy2ri import numpy2ri
46 50 ro.conversion.py2ri = numpy2ri
47 51
48 52 # IPython imports
49 53
50 54 from IPython.core.displaypub import publish_display_data
51 55 from IPython.core.magic import (Magics, magics_class, cell_magic, line_magic,
52 56 line_cell_magic)
53 57 from IPython.testing.skipdoctest import skip_doctest
54 58 from IPython.core.magic_arguments import (
55 59 argument, magic_arguments, parse_argstring
56 60 )
57 61 from IPython.utils.py3compat import str_to_unicode, unicode_to_str
58 62
59 63 class RMagicError(ri.RRuntimeError):
60 64 pass
61 65
62 def Rconverter(Robj):
66 def Rconverter(Robj, dataframe=False):
63 67 """
64 68 Convert an object in R's namespace to one suitable
65 69 for ipython's namespace.
66 70
67 71 For a data.frame, it tries to return a structured array.
72 It first checks for colnames, then names.
73 If all are NULL, it returns np.asarray(Robj), else
74 it tries to construct a recarray
68 75
69 76 Parameters
70 77 ----------
71 78
72 79 Robj: an R object returned from rpy2
73 80 """
74 if is_data_frame(Robj):
75 Robj = as_data_frame(Robj)
76 dimRobj = list(np.array(dimR(Robj)))
77 if 1 not in dimRobj:
78 Robj = np.rec.fromarrays(Robj, names = tuple(Robj.names))
79 return np.squeeze(np.asarray(Robj))
80
81 is_data_frame = None
82 as_data_frame = None
83 dimR = None
84 colnames = None
85 ncol = None
86 nrow = None
81 is_data_frame = ro.r('is.data.frame')
82 colnames = ro.r('colnames')
83 rownames = ro.r('rownames') # with pandas, these could be used for the index
84 names = ro.r('names')
85
86
87 if dataframe:
88 as_data_frame = ro.r('as.data.frame')
89 cols = colnames(Robj)
90 rows = rownames(Robj)
91 _names = names(Robj)
92 if cols != ri.NULL:
93 Robj = as_data_frame(Robj)
94 names = tuple(np.array(cols))
95 elif _names != ri.NULL:
96 names = tuple(np.array(_names))
97 else: # failed to find names
98 return np.asarray(Robj)
99 Robj = np.rec.fromarrays(Robj, names = names)
100 return np.asarray(Robj)
87 101
88 102 @magics_class
89 103 class RMagics(Magics):
90 104 """A set of magics useful for interactive work with R via rpy2.
91 105 """
92 106
93 107 def __init__(self, shell, Rconverter=Rconverter,
94 108 pyconverter=np.asarray,
95 109 cache_display_data=False):
96 110 """
97 111 Parameters
98 112 ----------
99 113
100 114 shell : IPython shell
101 115
102 116 pyconverter : callable
103 117 To be called on values in ipython namespace before
104 118 assigning to variables in rpy2.
105 119
106 120 cache_display_data : bool
107 121 If True, the published results of the final call to R are
108 122 cached in the variable 'display_cache'.
109 123
110 124 """
111 125 super(RMagics, self).__init__(shell)
112 126 self.cache_display_data = cache_display_data
113 127
114 128 self.r = ro.R()
115 global is_data_frame, dimR, colnames, ncol, nrow, as_data_frame
116 is_data_frame = self.r('is.data.frame')
117 as_data_frame = self.r('as.data.frame')
118 dimR = self.r('dim')
119 colnames = self.r('colnames')
120 ncol = self.r('ncol')
121 nrow = self.r('nrow')
122 129
123 130 self.Rstdout_cache = []
124 131 self.pyconverter = pyconverter
125 132 self.Rconverter = Rconverter
126 133
127 134 def eval(self, line):
128 135 '''
129 136 Parse and evaluate a line with rpy2.
130 137 Returns the output to R's stdout() connection
131 138 and the value of eval(parse(line)).
132 139 '''
133 140 old_writeconsole = ri.get_writeconsole()
134 141 ri.set_writeconsole(self.write_console)
135 142 try:
136 143 value = ri.baseenv['eval'](ri.parse(line))
137 144 except (ri.RRuntimeError, ValueError) as exception:
138 145 raise RMagicError(unicode_to_str('parsing and evaluating line "%s". R traceback: "%s"\n' %
139 146 (line, str_to_unicode(exception.message, 'utf-8'))))
140 147 text_output = self.flush()
141 148 ri.set_writeconsole(old_writeconsole)
142 149 return text_output, value
143 150
144 151 def write_console(self, output):
145 152 '''
146 153 A hook to capture R's stdout in a cache.
147 154 '''
148 155 self.Rstdout_cache.append(output)
149 156
150 157 def flush(self):
151 158 '''
152 159 Flush R's stdout cache to a string, returning the string.
153 160 '''
154 161 value = ''.join([str_to_unicode(s, 'utf-8') for s in self.Rstdout_cache])
155 162 self.Rstdout_cache = []
156 163 return value
157 164
158 165 @skip_doctest
159 166 @line_magic
160 167 def Rpush(self, line):
161 168 '''
162 169 A line-level magic for R that pushes
163 170 variables from python to rpy2. The line should be made up
164 171 of whitespace separated variable names in the IPython
165 172 namespace::
166 173
167 174 In [7]: import numpy as np
168 175
169 176 In [8]: X = np.array([4.5,6.3,7.9])
170 177
171 178 In [9]: X.mean()
172 179 Out[9]: 6.2333333333333343
173 180
174 181 In [10]: %Rpush X
175 182
176 183 In [11]: %R mean(X)
177 184 Out[11]: array([ 6.23333333])
178 185
179 186 '''
180 187
181 188 inputs = line.split(' ')
182 189 for input in inputs:
183 190 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
184 191
185 192 @skip_doctest
193 @magic_arguments()
194 @argument(
195 '-d', '--as_dataframe', action='store_true',
196 default=False,
197 help='Convert objects to data.frames before returning to ipython.'
198 )
199 @argument(
200 'outputs',
201 nargs='*',
202 )
186 203 @line_magic
187 204 def Rpull(self, line):
188 205 '''
189 206 A line-level magic for R that pulls
190 207 variables from python to rpy2::
191 208
192 209 In [18]: _ = %R x = c(3,4,6.7); y = c(4,6,7); z = c('a',3,4)
193 210
194 211 In [19]: %Rpull x y z
195 212
196 213 In [20]: x
197 214 Out[20]: array([ 3. , 4. , 6.7])
198 215
199 216 In [21]: y
200 217 Out[21]: array([ 4., 6., 7.])
201 218
202 219 In [22]: z
203 220 Out[22]:
204 221 array(['a', '3', '4'],
205 222 dtype='|S1')
206 223
207 224
225 If --as_dataframe, then each object is returned as a structured array
226 after first passed through "as.data.frame" in R before
227 being calling self.Rconverter.
228 This is useful when a structured array is desired as output, or
229 when the object in R has mixed data types.
230 See the %%R docstring for more examples.
231
208 232 Notes
209 233 -----
210 234
211 235 Beware that R names can have '.' so this is not fool proof.
212 236 To avoid this, don't name your R objects with '.'s...
213 237
214 238 '''
215 outputs = line.split(' ')
239 args = parse_argstring(self.Rpull, line)
240 outputs = args.outputs
216 241 for output in outputs:
217 self.shell.push({output:self.Rconverter(self.r(output))})
242 self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)})
243
244 @skip_doctest
245 @magic_arguments()
246 @argument(
247 '-d', '--as_dataframe', action='store_true',
248 default=False,
249 help='Convert objects to data.frames before returning to ipython.'
250 )
251 @argument(
252 'output',
253 nargs=1,
254 type=str,
255 )
256 @line_magic
257 def Rget(self, line):
258 '''
259 Return an object from rpy2, possibly as a structured array (if possible).
260 Similar to Rpull except only one argument is accepted and the value is
261 returned rather than pushed to self.shell.user_ns::
262
263 In [3]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
264
265 In [4]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
266
267 In [5]: %R -i datapy
268
269 In [6]: %Rget datapy
270 Out[6]:
271 array([['1', '2', '3', '4'],
272 ['2', '3', '2', '5'],
273 ['a', 'b', 'c', 'e']],
274 dtype='|S1')
275
276 In [7]: %Rget -d datapy
277 Out[7]:
278 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
279 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
280
281 '''
282 args = parse_argstring(self.Rget, line)
283 output = args.output
284 return self.Rconverter(self.r(output[0]),dataframe=args.as_dataframe)
218 285
219 286
220 287 @skip_doctest
221 288 @magic_arguments()
222 289 @argument(
223 290 '-i', '--input', action='append',
224 291 help='Names of input variable from shell.user_ns to be assigned to R variables of the same names after calling self.pyconverter. Multiple names can be passed separated only by commas with no whitespace.'
225 292 )
226 293 @argument(
227 294 '-o', '--output', action='append',
228 295 help='Names of variables to be pushed from rpy2 to shell.user_ns after executing cell body and applying self.Rconverter. Multiple names can be passed separated only by commas with no whitespace.'
229 296 )
230 297 @argument(
231 298 '-w', '--width', type=int,
232 299 help='Width of png plotting device sent as an argument to *png* in R.'
233 300 )
234 301 @argument(
235 302 '-h', '--height', type=int,
236 303 help='Height of png plotting device sent as an argument to *png* in R.'
237 304 )
238 305
239 306 @argument(
307 '-d', '--dataframe', action='append',
308 help='Convert these objects to data.frames and return as structured arrays.'
309 )
310 @argument(
240 311 '-u', '--units', type=int,
241 312 help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
242 313 )
243 314 @argument(
244 315 '-p', '--pointsize', type=int,
245 316 help='Pointsize of png plotting device sent as an argument to *png* in R.'
246 317 )
247 318 @argument(
248 319 '-b', '--bg',
249 320 help='Background of png plotting device sent as an argument to *png* in R.'
250 321 )
251 322 @argument(
252 323 '-n', '--noreturn',
253 324 help='Force the magic to not return anything.',
254 325 action='store_true',
255 326 default=False
256 327 )
257 328 @argument(
258 329 'code',
259 330 nargs='*',
260 331 )
261 332 @line_cell_magic
262 333 def R(self, line, cell=None):
263 334 '''
264 335 Execute code in R, and pull some of the results back into the Python namespace.
265 336
266 337 In line mode, this will evaluate an expression and convert the returned value to a Python object.
267 338 The return value is determined by rpy2's behaviour of returning the result of evaluating the
268 339 final line. Multiple R lines can be executed by joining them with semicolons::
269 340
270 341 In [9]: %R X=c(1,4,5,7); sd(X); mean(X)
271 342 Out[9]: array([ 4.25])
272 343
273 344 As a cell, this will run a block of R code, without bringing anything back by default::
274 345
275 346 In [10]: %%R
276 347 ....: Y = c(2,4,3,9)
277 348 ....: print(summary(lm(Y~X)))
278 349 ....:
279 350
280 351 Call:
281 352 lm(formula = Y ~ X)
282 353
283 354 Residuals:
284 355 1 2 3 4
285 356 0.88 -0.24 -2.28 1.64
286 357
287 358 Coefficients:
288 359 Estimate Std. Error t value Pr(>|t|)
289 360 (Intercept) 0.0800 2.3000 0.035 0.975
290 361 X 1.0400 0.4822 2.157 0.164
291 362
292 363 Residual standard error: 2.088 on 2 degrees of freedom
293 364 Multiple R-squared: 0.6993,Adjusted R-squared: 0.549
294 365 F-statistic: 4.651 on 1 and 2 DF, p-value: 0.1638
295 366
296 367 In the notebook, plots are published as the output of the cell.
297 368
298 369 %R plot(X, Y)
299 370
300 371 will create a scatter plot of X bs Y.
301 372
302 373 If cell is not None and line has some R code, it is prepended to
303 374 the R code in cell.
304 375
305 376 Objects can be passed back and forth between rpy2 and python via the -i -o flags in line::
306 377
307 378 In [14]: Z = np.array([1,4,5,10])
308 379
309 380 In [15]: %R -i Z mean(Z)
310 381 Out[15]: array([ 5.])
311 382
312 383
313 384 In [16]: %R -o W W=Z*mean(Z)
314 385 Out[16]: array([ 5., 20., 25., 50.])
315 386
316 387 In [17]: W
317 388 Out[17]: array([ 5., 20., 25., 50.])
318 389
319 390 The return value is determined by these rules:
320 391
321 392 * If the cell is not None, the magic returns None.
322 393
323 394 * If the cell evaluates as False, the resulting value is returned
324 395 unless the final line prints something to the console, in
325 396 which case None is returned.
326 397
327 398 * If the final line results in a NULL value when evaluated
328 399 by rpy2, then None is returned.
329 400
401 The --dataframe argument will return structured arrays
402 from dataframes in R. This is useful for dataframes with
403 mixed data types. Note also that for a data.frame,
404 if it is returned as an ndarray, it is transposed::
405
406 In [18]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
407
408 In [19]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
409
410 In [20]: %%R -o datar
411 datar = datapy
412 ....:
413
414 In [21]: datar
415 Out[21]:
416 array([['1', '2', '3', '4'],
417 ['2', '3', '2', '5'],
418 ['a', 'b', 'c', 'e']],
419 dtype='|S1')
420
421 In [22]: %%R -d datar
422 datar = datapy
423 ....:
424
425 In [23]: datar
426 Out[23]:
427 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
428 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
429
430 The --dataframe argument first tries colnames, then rownames, then names.
431 If all are NULL, it returns an ndarray (i.e. unstructured)::
432
433
434 In [1]: %R mydata=c(4,6,8.3); NULL
435
436 In [2]: %R -d mydata
437
438 In [3]: mydata
439 Out[3]: array([ 4. , 6. , 8.3])
440
441 In [4]: %R names(mydata) = c('a','b','c'); NULL
442
443 In [5]: %R -d mydata
444
445 In [6]: mydata
446 Out[6]:
447 array((4.0, 6.0, 8.3),
448 dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
449
450 In [7]: %R -o mydata
451
452 In [8]: mydata
453 Out[8]: array([ 4. , 6. , 8.3])
454
330 455
331 456 '''
332 457
333 458 args = parse_argstring(self.R, line)
334 459
335 460 # arguments 'code' in line are prepended to
336 461 # the cell lines
337 462 if not cell:
338 463 code = ''
339 464 return_output = True
340 465 line_mode = True
341 466 else:
342 467 code = cell
343 468 return_output = False
344 469 line_mode = False
345 470
346 471 code = ' '.join(args.code) + code
347 472
348 473 if args.input:
349 474 for input in ','.join(args.input).split(','):
350 475 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
351 476
352 477 png_argdict = dict([(n, getattr(args, n)) for n in ['units', 'height', 'width', 'bg', 'pointsize']])
353 478 png_args = ','.join(['%s=%s' % (o,v) for o, v in png_argdict.items() if v is not None])
354 479 # execute the R code in a temporary directory
355 480
356 481 tmpd = tempfile.mkdtemp()
357 482 self.r('png("%s/Rplots%%03d.png",%s)' % (tmpd, png_args))
358 483
359 484 text_output = ''
360 485 if line_mode:
361 486 for line in code.split(';'):
362 487 text_result, result = self.eval(line)
363 488 text_output += text_result
364 489 if text_result:
365 490 # the last line printed something to the console so we won't return it
366 491 return_output = False
367 492 else:
368 493 text_result, result = self.eval(code)
369 494 text_output += text_result
370 495
371 496 self.r('dev.off()')
372 497
373 498 # read out all the saved .png files
374 499
375 500 images = [open(imgfile, 'rb').read() for imgfile in glob("%s/Rplots*png" % tmpd)]
376 501
377 502 # now publish the images
378 503 # mimicking IPython/zmq/pylab/backend_inline.py
379 504 fmt = 'png'
380 505 mimetypes = { 'png' : 'image/png', 'svg' : 'image/svg+xml' }
381 506 mime = mimetypes[fmt]
382 507
383 508 # publish the printed R objects, if any
384 509
385 510 display_data = []
386 511 if text_output:
387 512 display_data.append(('RMagic.R', {'text/plain':text_output}))
388 513
389 514 # flush text streams before sending figures, helps a little with output
390 515 for image in images:
391 516 # synchronization in the console (though it's a bandaid, not a real sln)
392 517 sys.stdout.flush(); sys.stderr.flush()
393 518 display_data.append(('RMagic.R', {mime: image}))
394 519
395 520 # kill the temporary directory
396 521 rmtree(tmpd)
397 522
398 523 # try to turn every output into a numpy array
399 524 # this means that output are assumed to be castable
400 525 # as numpy arrays
401 526
402 527 if args.output:
403 528 for output in ','.join(args.output).split(','):
404 self.shell.push({output:self.Rconverter(self.r(output))})
529 self.shell.push({output:self.Rconverter(self.r(output), dataframe=False)})
530
531 if args.dataframe:
532 for output in ','.join(args.dataframe).split(','):
533 self.shell.push({output:self.Rconverter(self.r(output), dataframe=True)})
405 534
406 535 for tag, disp_d in display_data:
407 536 publish_display_data(tag, disp_d)
408 537
409 538 # this will keep a reference to the display_data
410 539 # which might be useful to other objects who happen to use
411 540 # this method
412 541
413 542 if self.cache_display_data:
414 543 self.display_cache = display_data
415 544
416 545 # if in line mode and return_output, return the result as an ndarray
417 546 if return_output and not args.noreturn:
418 547 if result != ri.NULL:
419 return self.Rconverter(result)
548 return self.Rconverter(result, dataframe=False)
420 549
421 550 __doc__ = __doc__.format(
422 551 R_DOC = ' '*8 + RMagics.R.__doc__,
423 552 RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
424 553 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__
554 RGET_DOC = ' '*8 + RMagics.Rget.__doc__
425 555 )
426 556
427 557
428 558 _loaded = False
429 559 def load_ipython_extension(ip):
430 560 """Load the extension in IPython."""
431 561 global _loaded
432 562 if not _loaded:
433 563 ip.register_magics(RMagics)
434 564 _loaded = True
@@ -1,62 +1,62
1 1 import numpy as np
2 2 from IPython.core.interactiveshell import InteractiveShell
3 3 from IPython.extensions import rmagic
4 4 import nose.tools as nt
5 5
6 6 ip = get_ipython()
7 7 ip.magic('load_ext rmagic')
8 8
9 9
10 10 def test_push():
11 11 rm = rmagic.RMagics(ip)
12 12 ip.push({'X':np.arange(5), 'Y':np.array([3,5,4,6,7])})
13 13 ip.run_line_magic('Rpush', 'X Y')
14 14 np.testing.assert_almost_equal(np.asarray(rm.r('X')), ip.user_ns['X'])
15 15 np.testing.assert_almost_equal(np.asarray(rm.r('Y')), ip.user_ns['Y'])
16 16
17 17 def test_pull():
18 18 rm = rmagic.RMagics(ip)
19 19 rm.r('Z=c(11:20)')
20 20 ip.run_line_magic('Rpull', 'Z')
21 21 np.testing.assert_almost_equal(np.asarray(rm.r('Z')), ip.user_ns['Z'])
22 22 np.testing.assert_almost_equal(ip.user_ns['Z'], np.arange(11,21))
23 23
24 24 def test_Rconverter():
25 25 datapy= np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c')],
26 26 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
27 27 ip.user_ns['datapy'] = datapy
28 28 ip.run_line_magic('Rpush', 'datapy')
29 29
30 30 # test to see if a copy is being made
31 v = ip.run_line_magic('R', 'datapy')
32 w = ip.run_line_magic('R', 'datapy')
31 v = ip.run_line_magic('Rget', '-d datapy')
32 w = ip.run_line_magic('Rget', '-d datapy')
33 33 np.testing.assert_almost_equal(w['x'], v['x'])
34 34 np.testing.assert_almost_equal(w['y'], v['y'])
35 35 nt.assert_true(np.all(w['z'] == v['z']))
36 36 np.testing.assert_equal(id(w.data), id(v.data))
37 37 nt.assert_equal(w.dtype, v.dtype)
38 38
39 ip.run_cell_magic('R', ' -o datar datar=datapy', '')
39 ip.run_cell_magic('R', ' -d datar datar=datapy', '')
40 40
41 u = ip.run_line_magic('R', 'datar')
41 u = ip.run_line_magic('Rget', ' -d datar')
42 42 np.testing.assert_almost_equal(u['x'], v['x'])
43 43 np.testing.assert_almost_equal(u['y'], v['y'])
44 44 nt.assert_true(np.all(u['z'] == v['z']))
45 45 np.testing.assert_equal(id(u.data), id(v.data))
46 46 nt.assert_equal(u.dtype, v.dtype)
47 47
48 48
49 49 def test_cell_magic():
50 50
51 51 ip.push({'x':np.arange(5), 'y':np.array([3,5,4,6,7])})
52 52 snippet = '''
53 53 print(summary(a))
54 54 plot(x, y, pch=23, bg='orange', cex=2)
55 55 plot(x, x)
56 56 print(summary(x))
57 57 r = resid(a)
58 58 xc = coef(a)
59 59 '''
60 60 ip.run_cell_magic('R', '-i x,y -o r,xc a=lm(y~x)', snippet)
61 61 np.testing.assert_almost_equal(ip.user_ns['xc'], [3.2, 0.9])
62 62 np.testing.assert_almost_equal(ip.user_ns['r'], np.array([-0.2, 0.9, -1. , 0.1, 0.2]))
General Comments 0
You need to be logged in to leave comments. Login now