##// END OF EJS Templates
bug in __doc__, flushing error message from RMagic's stdout cache
Jonathan Taylor -
Show More
@@ -1,565 +1,566 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 ======
4 4 Rmagic
5 5 ======
6 6
7 7 Magic command interface for interactive work with R via rpy2
8 8
9 9 Usage
10 10 =====
11 11
12 12 ``%R``
13 13
14 14 {R_DOC}
15 15
16 16 ``%Rpush``
17 17
18 18 {RPUSH_DOC}
19 19
20 20 ``%Rpull``
21 21
22 22 {RPULL_DOC}
23 23
24 24 ``%Rget``
25 25
26 26 {RGET_DOC}
27 27
28 28 """
29 29
30 30 #-----------------------------------------------------------------------------
31 31 # Copyright (C) 2012 The IPython Development Team
32 32 #
33 33 # Distributed under the terms of the BSD License. The full license is in
34 34 # the file COPYING, distributed as part of this software.
35 35 #-----------------------------------------------------------------------------
36 36
37 37 import sys
38 38 import tempfile
39 39 from glob import glob
40 40 from shutil import rmtree
41 41 from getopt import getopt
42 42
43 43 # numpy and rpy2 imports
44 44
45 45 import numpy as np
46 46
47 47 import rpy2.rinterface as ri
48 48 import rpy2.robjects as ro
49 49 from rpy2.robjects.numpy2ri import numpy2ri
50 50 ro.conversion.py2ri = numpy2ri
51 51
52 52 # IPython imports
53 53
54 54 from IPython.core.displaypub import publish_display_data
55 55 from IPython.core.magic import (Magics, magics_class, cell_magic, line_magic,
56 56 line_cell_magic)
57 57 from IPython.testing.skipdoctest import skip_doctest
58 58 from IPython.core.magic_arguments import (
59 59 argument, magic_arguments, parse_argstring
60 60 )
61 61 from IPython.utils.py3compat import str_to_unicode, unicode_to_str
62 62
63 63 class RMagicError(ri.RRuntimeError):
64 64 pass
65 65
66 66 def Rconverter(Robj, dataframe=False):
67 67 """
68 68 Convert an object in R's namespace to one suitable
69 69 for ipython's namespace.
70 70
71 71 For a data.frame, it tries to return a structured array.
72 72 It first checks for colnames, then names.
73 73 If all are NULL, it returns np.asarray(Robj), else
74 74 it tries to construct a recarray
75 75
76 76 Parameters
77 77 ----------
78 78
79 79 Robj: an R object returned from rpy2
80 80 """
81 81 is_data_frame = ro.r('is.data.frame')
82 82 colnames = ro.r('colnames')
83 83 rownames = ro.r('rownames') # with pandas, these could be used for the index
84 84 names = ro.r('names')
85 85
86 86 if dataframe:
87 87 as_data_frame = ro.r('as.data.frame')
88 88 cols = colnames(Robj)
89 89 _names = names(Robj)
90 90 if cols != ri.NULL:
91 91 Robj = as_data_frame(Robj)
92 92 names = tuple(np.array(cols))
93 93 elif _names != ri.NULL:
94 94 names = tuple(np.array(_names))
95 95 else: # failed to find names
96 96 return np.asarray(Robj)
97 97 Robj = np.rec.fromarrays(Robj, names = names)
98 98 return np.asarray(Robj)
99 99
100 100 @magics_class
101 101 class RMagics(Magics):
102 102 """A set of magics useful for interactive work with R via rpy2.
103 103 """
104 104
105 105 def __init__(self, shell, Rconverter=Rconverter,
106 106 pyconverter=np.asarray,
107 107 cache_display_data=False):
108 108 """
109 109 Parameters
110 110 ----------
111 111
112 112 shell : IPython shell
113 113
114 114 pyconverter : callable
115 115 To be called on values in ipython namespace before
116 116 assigning to variables in rpy2.
117 117
118 118 cache_display_data : bool
119 119 If True, the published results of the final call to R are
120 120 cached in the variable 'display_cache'.
121 121
122 122 """
123 123 super(RMagics, self).__init__(shell)
124 124 self.cache_display_data = cache_display_data
125 125
126 126 self.r = ro.R()
127 127
128 128 self.Rstdout_cache = []
129 129 self.pyconverter = pyconverter
130 130 self.Rconverter = Rconverter
131 131
132 132 def eval(self, line):
133 133 '''
134 134 Parse and evaluate a line with rpy2.
135 135 Returns the output to R's stdout() connection
136 136 and the value of eval(parse(line)).
137 137 '''
138 138 old_writeconsole = ri.get_writeconsole()
139 139 ri.set_writeconsole(self.write_console)
140 140 try:
141 141 value = ri.baseenv['eval'](ri.parse(line))
142 142 except (ri.RRuntimeError, ValueError) as exception:
143 self.flush() # otherwise next return seems to have copy of error
143 144 raise RMagicError(unicode_to_str('parsing and evaluating line "%s". R traceback: "%s"\n' %
144 145 (line, str_to_unicode(exception.message, 'utf-8'))))
145 146 text_output = self.flush()
146 147 ri.set_writeconsole(old_writeconsole)
147 148 return text_output, value
148 149
149 150 def write_console(self, output):
150 151 '''
151 152 A hook to capture R's stdout in a cache.
152 153 '''
153 154 self.Rstdout_cache.append(output)
154 155
155 156 def flush(self):
156 157 '''
157 158 Flush R's stdout cache to a string, returning the string.
158 159 '''
159 160 value = ''.join([str_to_unicode(s, 'utf-8') for s in self.Rstdout_cache])
160 161 self.Rstdout_cache = []
161 162 return value
162 163
163 164 @skip_doctest
164 165 @line_magic
165 166 def Rpush(self, line):
166 167 '''
167 168 A line-level magic for R that pushes
168 169 variables from python to rpy2. The line should be made up
169 170 of whitespace separated variable names in the IPython
170 171 namespace::
171 172
172 173 In [7]: import numpy as np
173 174
174 175 In [8]: X = np.array([4.5,6.3,7.9])
175 176
176 177 In [9]: X.mean()
177 178 Out[9]: 6.2333333333333343
178 179
179 180 In [10]: %Rpush X
180 181
181 182 In [11]: %R mean(X)
182 183 Out[11]: array([ 6.23333333])
183 184
184 185 '''
185 186
186 187 inputs = line.split(' ')
187 188 for input in inputs:
188 189 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
189 190
190 191 @skip_doctest
191 192 @magic_arguments()
192 193 @argument(
193 194 '-d', '--as_dataframe', action='store_true',
194 195 default=False,
195 196 help='Convert objects to data.frames before returning to ipython.'
196 197 )
197 198 @argument(
198 199 'outputs',
199 200 nargs='*',
200 201 )
201 202 @line_magic
202 203 def Rpull(self, line):
203 204 '''
204 205 A line-level magic for R that pulls
205 206 variables from python to rpy2::
206 207
207 208 In [18]: _ = %R x = c(3,4,6.7); y = c(4,6,7); z = c('a',3,4)
208 209
209 210 In [19]: %Rpull x y z
210 211
211 212 In [20]: x
212 213 Out[20]: array([ 3. , 4. , 6.7])
213 214
214 215 In [21]: y
215 216 Out[21]: array([ 4., 6., 7.])
216 217
217 218 In [22]: z
218 219 Out[22]:
219 220 array(['a', '3', '4'],
220 221 dtype='|S1')
221 222
222 223
223 224 If --as_dataframe, then each object is returned as a structured array
224 225 after first passed through "as.data.frame" in R before
225 226 being calling self.Rconverter.
226 227 This is useful when a structured array is desired as output, or
227 228 when the object in R has mixed data types.
228 229 See the %%R docstring for more examples.
229 230
230 231 Notes
231 232 -----
232 233
233 234 Beware that R names can have '.' so this is not fool proof.
234 235 To avoid this, don't name your R objects with '.'s...
235 236
236 237 '''
237 238 args = parse_argstring(self.Rpull, line)
238 239 outputs = args.outputs
239 240 for output in outputs:
240 241 self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)})
241 242
242 243 @skip_doctest
243 244 @magic_arguments()
244 245 @argument(
245 246 '-d', '--as_dataframe', action='store_true',
246 247 default=False,
247 248 help='Convert objects to data.frames before returning to ipython.'
248 249 )
249 250 @argument(
250 251 'output',
251 252 nargs=1,
252 253 type=str,
253 254 )
254 255 @line_magic
255 256 def Rget(self, line):
256 257 '''
257 258 Return an object from rpy2, possibly as a structured array (if possible).
258 259 Similar to Rpull except only one argument is accepted and the value is
259 260 returned rather than pushed to self.shell.user_ns::
260 261
261 262 In [3]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
262 263
263 264 In [4]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
264 265
265 266 In [5]: %R -i datapy
266 267
267 268 In [6]: %Rget datapy
268 269 Out[6]:
269 270 array([['1', '2', '3', '4'],
270 271 ['2', '3', '2', '5'],
271 272 ['a', 'b', 'c', 'e']],
272 273 dtype='|S1')
273 274
274 275 In [7]: %Rget -d datapy
275 276 Out[7]:
276 277 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
277 278 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
278 279
279 280 '''
280 281 args = parse_argstring(self.Rget, line)
281 282 output = args.output
282 283 return self.Rconverter(self.r(output[0]),dataframe=args.as_dataframe)
283 284
284 285
285 286 @skip_doctest
286 287 @magic_arguments()
287 288 @argument(
288 289 '-i', '--input', action='append',
289 290 help='Names of input variable from shell.user_ns to be assigned to R variables of the same names after calling self.pyconverter. Multiple names can be passed separated only by commas with no whitespace.'
290 291 )
291 292 @argument(
292 293 '-o', '--output', action='append',
293 294 help='Names of variables to be pushed from rpy2 to shell.user_ns after executing cell body and applying self.Rconverter. Multiple names can be passed separated only by commas with no whitespace.'
294 295 )
295 296 @argument(
296 297 '-w', '--width', type=int,
297 298 help='Width of png plotting device sent as an argument to *png* in R.'
298 299 )
299 300 @argument(
300 301 '-h', '--height', type=int,
301 302 help='Height of png plotting device sent as an argument to *png* in R.'
302 303 )
303 304
304 305 @argument(
305 306 '-d', '--dataframe', action='append',
306 307 help='Convert these objects to data.frames and return as structured arrays.'
307 308 )
308 309 @argument(
309 310 '-u', '--units', type=int,
310 311 help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
311 312 )
312 313 @argument(
313 314 '-p', '--pointsize', type=int,
314 315 help='Pointsize of png plotting device sent as an argument to *png* in R.'
315 316 )
316 317 @argument(
317 318 '-b', '--bg',
318 319 help='Background of png plotting device sent as an argument to *png* in R.'
319 320 )
320 321 @argument(
321 322 '-n', '--noreturn',
322 323 help='Force the magic to not return anything.',
323 324 action='store_true',
324 325 default=False
325 326 )
326 327 @argument(
327 328 'code',
328 329 nargs='*',
329 330 )
330 331 @line_cell_magic
331 332 def R(self, line, cell=None):
332 333 '''
333 334 Execute code in R, and pull some of the results back into the Python namespace.
334 335
335 336 In line mode, this will evaluate an expression and convert the returned value to a Python object.
336 337 The return value is determined by rpy2's behaviour of returning the result of evaluating the
337 338 final line.
338 339
339 340 Multiple R lines can be executed by joining them with semicolons::
340 341
341 342 In [9]: %R X=c(1,4,5,7); sd(X); mean(X)
342 343 Out[9]: array([ 4.25])
343 344
344 345 As a cell, this will run a block of R code, without bringing anything back by default::
345 346
346 347 In [10]: %%R
347 348 ....: Y = c(2,4,3,9)
348 349 ....: print(summary(lm(Y~X)))
349 350 ....:
350 351
351 352 Call:
352 353 lm(formula = Y ~ X)
353 354
354 355 Residuals:
355 356 1 2 3 4
356 357 0.88 -0.24 -2.28 1.64
357 358
358 359 Coefficients:
359 360 Estimate Std. Error t value Pr(>|t|)
360 361 (Intercept) 0.0800 2.3000 0.035 0.975
361 362 X 1.0400 0.4822 2.157 0.164
362 363
363 364 Residual standard error: 2.088 on 2 degrees of freedom
364 365 Multiple R-squared: 0.6993,Adjusted R-squared: 0.549
365 366 F-statistic: 4.651 on 1 and 2 DF, p-value: 0.1638
366 367
367 368 In the notebook, plots are published as the output of the cell.
368 369
369 370 %R plot(X, Y)
370 371
371 372 will create a scatter plot of X bs Y.
372 373
373 374 If cell is not None and line has some R code, it is prepended to
374 375 the R code in cell.
375 376
376 377 Objects can be passed back and forth between rpy2 and python via the -i -o flags in line::
377 378
378 379 In [14]: Z = np.array([1,4,5,10])
379 380
380 381 In [15]: %R -i Z mean(Z)
381 382 Out[15]: array([ 5.])
382 383
383 384
384 385 In [16]: %R -o W W=Z*mean(Z)
385 386 Out[16]: array([ 5., 20., 25., 50.])
386 387
387 388 In [17]: W
388 389 Out[17]: array([ 5., 20., 25., 50.])
389 390
390 391 The return value is determined by these rules:
391 392
392 393 * If the cell is not None, the magic returns None.
393 394
394 395 * If the cell evaluates as False, the resulting value is returned
395 396 unless the final line prints something to the console, in
396 397 which case None is returned.
397 398
398 399 * If the final line results in a NULL value when evaluated
399 400 by rpy2, then None is returned.
400 401
401 402 * No attempt is made to convert the final value to a structured array.
402 403 Use the --dataframe flag or %Rget to push / return a structured array.
403 404
404 405 The --dataframe argument will attempt to return structured arrays.
405 406 This is useful for dataframes with
406 407 mixed data types. Note also that for a data.frame,
407 408 if it is returned as an ndarray, it is transposed::
408 409
409 410 In [18]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
410 411
411 412 In [19]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
412 413
413 414 In [20]: %%R -o datar
414 415 datar = datapy
415 416 ....:
416 417
417 418 In [21]: datar
418 419 Out[21]:
419 420 array([['1', '2', '3', '4'],
420 421 ['2', '3', '2', '5'],
421 422 ['a', 'b', 'c', 'e']],
422 423 dtype='|S1')
423 424
424 425 In [22]: %%R -d datar
425 426 datar = datapy
426 427 ....:
427 428
428 429 In [23]: datar
429 430 Out[23]:
430 431 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
431 432 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
432 433
433 434 The --dataframe argument first tries colnames, then names.
434 435 If both are NULL, it returns an ndarray (i.e. unstructured)::
435 436
436 437 In [1]: %R mydata=c(4,6,8.3); NULL
437 438
438 439 In [2]: %R -d mydata
439 440
440 441 In [3]: mydata
441 442 Out[3]: array([ 4. , 6. , 8.3])
442 443
443 444 In [4]: %R names(mydata) = c('a','b','c'); NULL
444 445
445 446 In [5]: %R -d mydata
446 447
447 448 In [6]: mydata
448 449 Out[6]:
449 450 array((4.0, 6.0, 8.3),
450 451 dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
451 452
452 453 In [7]: %R -o mydata
453 454
454 455 In [8]: mydata
455 456 Out[8]: array([ 4. , 6. , 8.3])
456 457
457 458 '''
458 459
459 460 args = parse_argstring(self.R, line)
460 461
461 462 # arguments 'code' in line are prepended to
462 463 # the cell lines
463 464 if not cell:
464 465 code = ''
465 466 return_output = True
466 467 line_mode = True
467 468 else:
468 469 code = cell
469 470 return_output = False
470 471 line_mode = False
471 472
472 473 code = ' '.join(args.code) + code
473 474
474 475 if args.input:
475 476 for input in ','.join(args.input).split(','):
476 477 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
477 478
478 479 png_argdict = dict([(n, getattr(args, n)) for n in ['units', 'height', 'width', 'bg', 'pointsize']])
479 480 png_args = ','.join(['%s=%s' % (o,v) for o, v in png_argdict.items() if v is not None])
480 481 # execute the R code in a temporary directory
481 482
482 483 tmpd = tempfile.mkdtemp()
483 484 self.r('png("%s/Rplots%%03d.png",%s)' % (tmpd, png_args))
484 485
485 486 text_output = ''
486 487 if line_mode:
487 488 for line in code.split(';'):
488 489 text_result, result = self.eval(line)
489 490 text_output += text_result
490 491 if text_result:
491 492 # the last line printed something to the console so we won't return it
492 493 return_output = False
493 494 else:
494 495 text_result, result = self.eval(code)
495 496 text_output += text_result
496 497
497 498 self.r('dev.off()')
498 499
499 500 # read out all the saved .png files
500 501
501 502 images = [open(imgfile, 'rb').read() for imgfile in glob("%s/Rplots*png" % tmpd)]
502 503
503 504 # now publish the images
504 505 # mimicking IPython/zmq/pylab/backend_inline.py
505 506 fmt = 'png'
506 507 mimetypes = { 'png' : 'image/png', 'svg' : 'image/svg+xml' }
507 508 mime = mimetypes[fmt]
508 509
509 510 # publish the printed R objects, if any
510 511
511 512 display_data = []
512 513 if text_output:
513 514 display_data.append(('RMagic.R', {'text/plain':text_output}))
514 515
515 516 # flush text streams before sending figures, helps a little with output
516 517 for image in images:
517 518 # synchronization in the console (though it's a bandaid, not a real sln)
518 519 sys.stdout.flush(); sys.stderr.flush()
519 520 display_data.append(('RMagic.R', {mime: image}))
520 521
521 522 # kill the temporary directory
522 523 rmtree(tmpd)
523 524
524 525 # try to turn every output into a numpy array
525 526 # this means that output are assumed to be castable
526 527 # as numpy arrays
527 528
528 529 if args.output:
529 530 for output in ','.join(args.output).split(','):
530 531 self.shell.push({output:self.Rconverter(self.r(output), dataframe=False)})
531 532
532 533 if args.dataframe:
533 534 for output in ','.join(args.dataframe).split(','):
534 535 self.shell.push({output:self.Rconverter(self.r(output), dataframe=True)})
535 536
536 537 for tag, disp_d in display_data:
537 538 publish_display_data(tag, disp_d)
538 539
539 540 # this will keep a reference to the display_data
540 541 # which might be useful to other objects who happen to use
541 542 # this method
542 543
543 544 if self.cache_display_data:
544 545 self.display_cache = display_data
545 546
546 547 # if in line mode and return_output, return the result as an ndarray
547 548 if return_output and not args.noreturn:
548 549 if result != ri.NULL:
549 550 return self.Rconverter(result, dataframe=False)
550 551
551 552 __doc__ = __doc__.format(
552 553 R_DOC = ' '*8 + RMagics.R.__doc__,
553 554 RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
554 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__
555 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__,
555 556 RGET_DOC = ' '*8 + RMagics.Rget.__doc__
556 557 )
557 558
558 559
559 560 _loaded = False
560 561 def load_ipython_extension(ip):
561 562 """Load the extension in IPython."""
562 563 global _loaded
563 564 if not _loaded:
564 565 ip.register_magics(RMagics)
565 566 _loaded = True
General Comments 0
You need to be logged in to leave comments. Login now