##// END OF EJS Templates
adding a comment about trailing semicolons
Jonathan Taylor -
Show More
@@ -1,566 +1,571 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 ======
4 4 Rmagic
5 5 ======
6 6
7 7 Magic command interface for interactive work with R via rpy2
8 8
9 9 Usage
10 10 =====
11 11
12 12 ``%R``
13 13
14 14 {R_DOC}
15 15
16 16 ``%Rpush``
17 17
18 18 {RPUSH_DOC}
19 19
20 20 ``%Rpull``
21 21
22 22 {RPULL_DOC}
23 23
24 24 ``%Rget``
25 25
26 26 {RGET_DOC}
27 27
28 28 """
29 29
30 30 #-----------------------------------------------------------------------------
31 31 # Copyright (C) 2012 The IPython Development Team
32 32 #
33 33 # Distributed under the terms of the BSD License. The full license is in
34 34 # the file COPYING, distributed as part of this software.
35 35 #-----------------------------------------------------------------------------
36 36
37 37 import sys
38 38 import tempfile
39 39 from glob import glob
40 40 from shutil import rmtree
41 41 from getopt import getopt
42 42
43 43 # numpy and rpy2 imports
44 44
45 45 import numpy as np
46 46
47 47 import rpy2.rinterface as ri
48 48 import rpy2.robjects as ro
49 49 from rpy2.robjects.numpy2ri import numpy2ri
50 50 ro.conversion.py2ri = numpy2ri
51 51
52 52 # IPython imports
53 53
54 54 from IPython.core.displaypub import publish_display_data
55 55 from IPython.core.magic import (Magics, magics_class, cell_magic, line_magic,
56 56 line_cell_magic)
57 57 from IPython.testing.skipdoctest import skip_doctest
58 58 from IPython.core.magic_arguments import (
59 59 argument, magic_arguments, parse_argstring
60 60 )
61 61 from IPython.utils.py3compat import str_to_unicode, unicode_to_str
62 62
63 63 class RMagicError(ri.RRuntimeError):
64 64 pass
65 65
66 66 def Rconverter(Robj, dataframe=False):
67 67 """
68 68 Convert an object in R's namespace to one suitable
69 69 for ipython's namespace.
70 70
71 71 For a data.frame, it tries to return a structured array.
72 72 It first checks for colnames, then names.
73 73 If all are NULL, it returns np.asarray(Robj), else
74 74 it tries to construct a recarray
75 75
76 76 Parameters
77 77 ----------
78 78
79 79 Robj: an R object returned from rpy2
80 80 """
81 81 is_data_frame = ro.r('is.data.frame')
82 82 colnames = ro.r('colnames')
83 83 rownames = ro.r('rownames') # with pandas, these could be used for the index
84 84 names = ro.r('names')
85 85
86 86 if dataframe:
87 87 as_data_frame = ro.r('as.data.frame')
88 88 cols = colnames(Robj)
89 89 _names = names(Robj)
90 90 if cols != ri.NULL:
91 91 Robj = as_data_frame(Robj)
92 92 names = tuple(np.array(cols))
93 93 elif _names != ri.NULL:
94 94 names = tuple(np.array(_names))
95 95 else: # failed to find names
96 96 return np.asarray(Robj)
97 97 Robj = np.rec.fromarrays(Robj, names = names)
98 98 return np.asarray(Robj)
99 99
100 100 @magics_class
101 101 class RMagics(Magics):
102 102 """A set of magics useful for interactive work with R via rpy2.
103 103 """
104 104
105 105 def __init__(self, shell, Rconverter=Rconverter,
106 106 pyconverter=np.asarray,
107 107 cache_display_data=False):
108 108 """
109 109 Parameters
110 110 ----------
111 111
112 112 shell : IPython shell
113 113
114 114 pyconverter : callable
115 115 To be called on values in ipython namespace before
116 116 assigning to variables in rpy2.
117 117
118 118 cache_display_data : bool
119 119 If True, the published results of the final call to R are
120 120 cached in the variable 'display_cache'.
121 121
122 122 """
123 123 super(RMagics, self).__init__(shell)
124 124 self.cache_display_data = cache_display_data
125 125
126 126 self.r = ro.R()
127 127
128 128 self.Rstdout_cache = []
129 129 self.pyconverter = pyconverter
130 130 self.Rconverter = Rconverter
131 131
132 132 def eval(self, line):
133 133 '''
134 134 Parse and evaluate a line with rpy2.
135 135 Returns the output to R's stdout() connection
136 136 and the value of eval(parse(line)).
137 137 '''
138 138 old_writeconsole = ri.get_writeconsole()
139 139 ri.set_writeconsole(self.write_console)
140 140 try:
141 141 value = ri.baseenv['eval'](ri.parse(line))
142 142 except (ri.RRuntimeError, ValueError) as exception:
143 143 warning_or_other_msg = self.flush() # otherwise next return seems to have copy of error
144 144 raise RMagicError(unicode_to_str('parsing and evaluating line "%s".\nR error message: "%s"\n R stdout:"%s"\n' %
145 145 (line, str_to_unicode(exception.message, 'utf-8'), warning_or_other_msg)))
146 146 text_output = self.flush()
147 147 ri.set_writeconsole(old_writeconsole)
148 148 return text_output, value
149 149
150 150 def write_console(self, output):
151 151 '''
152 152 A hook to capture R's stdout in a cache.
153 153 '''
154 154 self.Rstdout_cache.append(output)
155 155
156 156 def flush(self):
157 157 '''
158 158 Flush R's stdout cache to a string, returning the string.
159 159 '''
160 160 value = ''.join([str_to_unicode(s, 'utf-8') for s in self.Rstdout_cache])
161 161 self.Rstdout_cache = []
162 162 return value
163 163
164 164 @skip_doctest
165 165 @line_magic
166 166 def Rpush(self, line):
167 167 '''
168 168 A line-level magic for R that pushes
169 169 variables from python to rpy2. The line should be made up
170 170 of whitespace separated variable names in the IPython
171 171 namespace::
172 172
173 173 In [7]: import numpy as np
174 174
175 175 In [8]: X = np.array([4.5,6.3,7.9])
176 176
177 177 In [9]: X.mean()
178 178 Out[9]: 6.2333333333333343
179 179
180 180 In [10]: %Rpush X
181 181
182 182 In [11]: %R mean(X)
183 183 Out[11]: array([ 6.23333333])
184 184
185 185 '''
186 186
187 187 inputs = line.split(' ')
188 188 for input in inputs:
189 189 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
190 190
191 191 @skip_doctest
192 192 @magic_arguments()
193 193 @argument(
194 194 '-d', '--as_dataframe', action='store_true',
195 195 default=False,
196 196 help='Convert objects to data.frames before returning to ipython.'
197 197 )
198 198 @argument(
199 199 'outputs',
200 200 nargs='*',
201 201 )
202 202 @line_magic
203 203 def Rpull(self, line):
204 204 '''
205 205 A line-level magic for R that pulls
206 206 variables from python to rpy2::
207 207
208 208 In [18]: _ = %R x = c(3,4,6.7); y = c(4,6,7); z = c('a',3,4)
209 209
210 210 In [19]: %Rpull x y z
211 211
212 212 In [20]: x
213 213 Out[20]: array([ 3. , 4. , 6.7])
214 214
215 215 In [21]: y
216 216 Out[21]: array([ 4., 6., 7.])
217 217
218 218 In [22]: z
219 219 Out[22]:
220 220 array(['a', '3', '4'],
221 221 dtype='|S1')
222 222
223 223
224 224 If --as_dataframe, then each object is returned as a structured array
225 225 after first passed through "as.data.frame" in R before
226 226 being calling self.Rconverter.
227 227 This is useful when a structured array is desired as output, or
228 228 when the object in R has mixed data types.
229 229 See the %%R docstring for more examples.
230 230
231 231 Notes
232 232 -----
233 233
234 234 Beware that R names can have '.' so this is not fool proof.
235 235 To avoid this, don't name your R objects with '.'s...
236 236
237 237 '''
238 238 args = parse_argstring(self.Rpull, line)
239 239 outputs = args.outputs
240 240 for output in outputs:
241 241 self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)})
242 242
243 243 @skip_doctest
244 244 @magic_arguments()
245 245 @argument(
246 246 '-d', '--as_dataframe', action='store_true',
247 247 default=False,
248 248 help='Convert objects to data.frames before returning to ipython.'
249 249 )
250 250 @argument(
251 251 'output',
252 252 nargs=1,
253 253 type=str,
254 254 )
255 255 @line_magic
256 256 def Rget(self, line):
257 257 '''
258 258 Return an object from rpy2, possibly as a structured array (if possible).
259 259 Similar to Rpull except only one argument is accepted and the value is
260 260 returned rather than pushed to self.shell.user_ns::
261 261
262 262 In [3]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
263 263
264 264 In [4]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
265 265
266 266 In [5]: %R -i datapy
267 267
268 268 In [6]: %Rget datapy
269 269 Out[6]:
270 270 array([['1', '2', '3', '4'],
271 271 ['2', '3', '2', '5'],
272 272 ['a', 'b', 'c', 'e']],
273 273 dtype='|S1')
274 274
275 275 In [7]: %Rget -d datapy
276 276 Out[7]:
277 277 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
278 278 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
279 279
280 280 '''
281 281 args = parse_argstring(self.Rget, line)
282 282 output = args.output
283 283 return self.Rconverter(self.r(output[0]),dataframe=args.as_dataframe)
284 284
285 285
286 286 @skip_doctest
287 287 @magic_arguments()
288 288 @argument(
289 289 '-i', '--input', action='append',
290 290 help='Names of input variable from shell.user_ns to be assigned to R variables of the same names after calling self.pyconverter. Multiple names can be passed separated only by commas with no whitespace.'
291 291 )
292 292 @argument(
293 293 '-o', '--output', action='append',
294 294 help='Names of variables to be pushed from rpy2 to shell.user_ns after executing cell body and applying self.Rconverter. Multiple names can be passed separated only by commas with no whitespace.'
295 295 )
296 296 @argument(
297 297 '-w', '--width', type=int,
298 298 help='Width of png plotting device sent as an argument to *png* in R.'
299 299 )
300 300 @argument(
301 301 '-h', '--height', type=int,
302 302 help='Height of png plotting device sent as an argument to *png* in R.'
303 303 )
304 304
305 305 @argument(
306 306 '-d', '--dataframe', action='append',
307 307 help='Convert these objects to data.frames and return as structured arrays.'
308 308 )
309 309 @argument(
310 310 '-u', '--units', type=int,
311 311 help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
312 312 )
313 313 @argument(
314 314 '-p', '--pointsize', type=int,
315 315 help='Pointsize of png plotting device sent as an argument to *png* in R.'
316 316 )
317 317 @argument(
318 318 '-b', '--bg',
319 319 help='Background of png plotting device sent as an argument to *png* in R.'
320 320 )
321 321 @argument(
322 322 '-n', '--noreturn',
323 323 help='Force the magic to not return anything.',
324 324 action='store_true',
325 325 default=False
326 326 )
327 327 @argument(
328 328 'code',
329 329 nargs='*',
330 330 )
331 331 @line_cell_magic
332 332 def R(self, line, cell=None):
333 333 '''
334 334 Execute code in R, and pull some of the results back into the Python namespace.
335 335
336 336 In line mode, this will evaluate an expression and convert the returned value to a Python object.
337 337 The return value is determined by rpy2's behaviour of returning the result of evaluating the
338 338 final line.
339 339
340 340 Multiple R lines can be executed by joining them with semicolons::
341 341
342 342 In [9]: %R X=c(1,4,5,7); sd(X); mean(X)
343 343 Out[9]: array([ 4.25])
344 344
345 345 As a cell, this will run a block of R code, without bringing anything back by default::
346 346
347 347 In [10]: %%R
348 348 ....: Y = c(2,4,3,9)
349 349 ....: print(summary(lm(Y~X)))
350 350 ....:
351 351
352 352 Call:
353 353 lm(formula = Y ~ X)
354 354
355 355 Residuals:
356 356 1 2 3 4
357 357 0.88 -0.24 -2.28 1.64
358 358
359 359 Coefficients:
360 360 Estimate Std. Error t value Pr(>|t|)
361 361 (Intercept) 0.0800 2.3000 0.035 0.975
362 362 X 1.0400 0.4822 2.157 0.164
363 363
364 364 Residual standard error: 2.088 on 2 degrees of freedom
365 365 Multiple R-squared: 0.6993,Adjusted R-squared: 0.549
366 366 F-statistic: 4.651 on 1 and 2 DF, p-value: 0.1638
367 367
368 368 In the notebook, plots are published as the output of the cell.
369 369
370 370 %R plot(X, Y)
371 371
372 372 will create a scatter plot of X bs Y.
373 373
374 374 If cell is not None and line has some R code, it is prepended to
375 375 the R code in cell.
376 376
377 377 Objects can be passed back and forth between rpy2 and python via the -i -o flags in line::
378 378
379 379 In [14]: Z = np.array([1,4,5,10])
380 380
381 381 In [15]: %R -i Z mean(Z)
382 382 Out[15]: array([ 5.])
383 383
384 384
385 385 In [16]: %R -o W W=Z*mean(Z)
386 386 Out[16]: array([ 5., 20., 25., 50.])
387 387
388 388 In [17]: W
389 389 Out[17]: array([ 5., 20., 25., 50.])
390 390
391 391 The return value is determined by these rules:
392 392
393 393 * If the cell is not None, the magic returns None.
394 394
395 395 * If the cell evaluates as False, the resulting value is returned
396 396 unless the final line prints something to the console, in
397 397 which case None is returned.
398 398
399 399 * If the final line results in a NULL value when evaluated
400 400 by rpy2, then None is returned.
401 401
402 402 * No attempt is made to convert the final value to a structured array.
403 403 Use the --dataframe flag or %Rget to push / return a structured array.
404 404
405 * If the -n flag is present, there is no return value.
406
407 * A trailing ';' will also result in no return value as the last
408 value in the line is an empty string.
409
405 410 The --dataframe argument will attempt to return structured arrays.
406 411 This is useful for dataframes with
407 412 mixed data types. Note also that for a data.frame,
408 413 if it is returned as an ndarray, it is transposed::
409 414
410 415 In [18]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
411 416
412 417 In [19]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
413 418
414 419 In [20]: %%R -o datar
415 420 datar = datapy
416 421 ....:
417 422
418 423 In [21]: datar
419 424 Out[21]:
420 425 array([['1', '2', '3', '4'],
421 426 ['2', '3', '2', '5'],
422 427 ['a', 'b', 'c', 'e']],
423 428 dtype='|S1')
424 429
425 430 In [22]: %%R -d datar
426 431 datar = datapy
427 432 ....:
428 433
429 434 In [23]: datar
430 435 Out[23]:
431 436 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
432 437 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
433 438
434 439 The --dataframe argument first tries colnames, then names.
435 440 If both are NULL, it returns an ndarray (i.e. unstructured)::
436 441
437 442 In [1]: %R mydata=c(4,6,8.3); NULL
438 443
439 444 In [2]: %R -d mydata
440 445
441 446 In [3]: mydata
442 447 Out[3]: array([ 4. , 6. , 8.3])
443 448
444 449 In [4]: %R names(mydata) = c('a','b','c'); NULL
445 450
446 451 In [5]: %R -d mydata
447 452
448 453 In [6]: mydata
449 454 Out[6]:
450 455 array((4.0, 6.0, 8.3),
451 456 dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
452 457
453 458 In [7]: %R -o mydata
454 459
455 460 In [8]: mydata
456 461 Out[8]: array([ 4. , 6. , 8.3])
457 462
458 463 '''
459 464
460 465 args = parse_argstring(self.R, line)
461 466
462 467 # arguments 'code' in line are prepended to
463 468 # the cell lines
464 469 if not cell:
465 470 code = ''
466 471 return_output = True
467 472 line_mode = True
468 473 else:
469 474 code = cell
470 475 return_output = False
471 476 line_mode = False
472 477
473 478 code = ' '.join(args.code) + code
474 479
475 480 if args.input:
476 481 for input in ','.join(args.input).split(','):
477 482 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
478 483
479 484 png_argdict = dict([(n, getattr(args, n)) for n in ['units', 'height', 'width', 'bg', 'pointsize']])
480 485 png_args = ','.join(['%s=%s' % (o,v) for o, v in png_argdict.items() if v is not None])
481 486 # execute the R code in a temporary directory
482 487
483 488 tmpd = tempfile.mkdtemp()
484 489 self.r('png("%s/Rplots%%03d.png",%s)' % (tmpd, png_args))
485 490
486 491 text_output = ''
487 492 if line_mode:
488 493 for line in code.split(';'):
489 494 text_result, result = self.eval(line)
490 495 text_output += text_result
491 496 if text_result:
492 497 # the last line printed something to the console so we won't return it
493 498 return_output = False
494 499 else:
495 500 text_result, result = self.eval(code)
496 501 text_output += text_result
497 502
498 503 self.r('dev.off()')
499 504
500 505 # read out all the saved .png files
501 506
502 507 images = [open(imgfile, 'rb').read() for imgfile in glob("%s/Rplots*png" % tmpd)]
503 508
504 509 # now publish the images
505 510 # mimicking IPython/zmq/pylab/backend_inline.py
506 511 fmt = 'png'
507 512 mimetypes = { 'png' : 'image/png', 'svg' : 'image/svg+xml' }
508 513 mime = mimetypes[fmt]
509 514
510 515 # publish the printed R objects, if any
511 516
512 517 display_data = []
513 518 if text_output:
514 519 display_data.append(('RMagic.R', {'text/plain':text_output}))
515 520
516 521 # flush text streams before sending figures, helps a little with output
517 522 for image in images:
518 523 # synchronization in the console (though it's a bandaid, not a real sln)
519 524 sys.stdout.flush(); sys.stderr.flush()
520 525 display_data.append(('RMagic.R', {mime: image}))
521 526
522 527 # kill the temporary directory
523 528 rmtree(tmpd)
524 529
525 530 # try to turn every output into a numpy array
526 531 # this means that output are assumed to be castable
527 532 # as numpy arrays
528 533
529 534 if args.output:
530 535 for output in ','.join(args.output).split(','):
531 536 self.shell.push({output:self.Rconverter(self.r(output), dataframe=False)})
532 537
533 538 if args.dataframe:
534 539 for output in ','.join(args.dataframe).split(','):
535 540 self.shell.push({output:self.Rconverter(self.r(output), dataframe=True)})
536 541
537 542 for tag, disp_d in display_data:
538 543 publish_display_data(tag, disp_d)
539 544
540 545 # this will keep a reference to the display_data
541 546 # which might be useful to other objects who happen to use
542 547 # this method
543 548
544 549 if self.cache_display_data:
545 550 self.display_cache = display_data
546 551
547 552 # if in line mode and return_output, return the result as an ndarray
548 553 if return_output and not args.noreturn:
549 554 if result != ri.NULL:
550 555 return self.Rconverter(result, dataframe=False)
551 556
552 557 __doc__ = __doc__.format(
553 558 R_DOC = ' '*8 + RMagics.R.__doc__,
554 559 RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
555 560 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__,
556 561 RGET_DOC = ' '*8 + RMagics.Rget.__doc__
557 562 )
558 563
559 564
560 565 _loaded = False
561 566 def load_ipython_extension(ip):
562 567 """Load the extension in IPython."""
563 568 global _loaded
564 569 if not _loaded:
565 570 ip.register_magics(RMagics)
566 571 _loaded = True
General Comments 0
You need to be logged in to leave comments. Login now