##// END OF EJS Templates
editing the docstring
Jonathan Taylor -
Show More
@@ -1,564 +1,565 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 ======
4 4 Rmagic
5 5 ======
6 6
7 7 Magic command interface for interactive work with R via rpy2
8 8
9 9 Usage
10 10 =====
11 11
12 12 ``%R``
13 13
14 14 {R_DOC}
15 15
16 16 ``%Rpush``
17 17
18 18 {RPUSH_DOC}
19 19
20 20 ``%Rpull``
21 21
22 22 {RPULL_DOC}
23 23
24 24 ``%Rget``
25 25
26 26 {RGET_DOC}
27 27
28 28 """
29 29
30 30 #-----------------------------------------------------------------------------
31 31 # Copyright (C) 2012 The IPython Development Team
32 32 #
33 33 # Distributed under the terms of the BSD License. The full license is in
34 34 # the file COPYING, distributed as part of this software.
35 35 #-----------------------------------------------------------------------------
36 36
37 37 import sys
38 38 import tempfile
39 39 from glob import glob
40 40 from shutil import rmtree
41 41 from getopt import getopt
42 42
43 43 # numpy and rpy2 imports
44 44
45 45 import numpy as np
46 46
47 47 import rpy2.rinterface as ri
48 48 import rpy2.robjects as ro
49 49 from rpy2.robjects.numpy2ri import numpy2ri
50 50 ro.conversion.py2ri = numpy2ri
51 51
52 52 # IPython imports
53 53
54 54 from IPython.core.displaypub import publish_display_data
55 55 from IPython.core.magic import (Magics, magics_class, cell_magic, line_magic,
56 56 line_cell_magic)
57 57 from IPython.testing.skipdoctest import skip_doctest
58 58 from IPython.core.magic_arguments import (
59 59 argument, magic_arguments, parse_argstring
60 60 )
61 61 from IPython.utils.py3compat import str_to_unicode, unicode_to_str
62 62
63 63 class RMagicError(ri.RRuntimeError):
64 64 pass
65 65
66 66 def Rconverter(Robj, dataframe=False):
67 67 """
68 68 Convert an object in R's namespace to one suitable
69 69 for ipython's namespace.
70 70
71 71 For a data.frame, it tries to return a structured array.
72 72 It first checks for colnames, then names.
73 73 If all are NULL, it returns np.asarray(Robj), else
74 74 it tries to construct a recarray
75 75
76 76 Parameters
77 77 ----------
78 78
79 79 Robj: an R object returned from rpy2
80 80 """
81 81 is_data_frame = ro.r('is.data.frame')
82 82 colnames = ro.r('colnames')
83 83 rownames = ro.r('rownames') # with pandas, these could be used for the index
84 84 names = ro.r('names')
85 85
86
87 86 if dataframe:
88 87 as_data_frame = ro.r('as.data.frame')
89 88 cols = colnames(Robj)
90 rows = rownames(Robj)
91 89 _names = names(Robj)
92 90 if cols != ri.NULL:
93 91 Robj = as_data_frame(Robj)
94 92 names = tuple(np.array(cols))
95 93 elif _names != ri.NULL:
96 94 names = tuple(np.array(_names))
97 95 else: # failed to find names
98 96 return np.asarray(Robj)
99 97 Robj = np.rec.fromarrays(Robj, names = names)
100 98 return np.asarray(Robj)
101 99
102 100 @magics_class
103 101 class RMagics(Magics):
104 102 """A set of magics useful for interactive work with R via rpy2.
105 103 """
106 104
107 105 def __init__(self, shell, Rconverter=Rconverter,
108 106 pyconverter=np.asarray,
109 107 cache_display_data=False):
110 108 """
111 109 Parameters
112 110 ----------
113 111
114 112 shell : IPython shell
115 113
116 114 pyconverter : callable
117 115 To be called on values in ipython namespace before
118 116 assigning to variables in rpy2.
119 117
120 118 cache_display_data : bool
121 119 If True, the published results of the final call to R are
122 120 cached in the variable 'display_cache'.
123 121
124 122 """
125 123 super(RMagics, self).__init__(shell)
126 124 self.cache_display_data = cache_display_data
127 125
128 126 self.r = ro.R()
129 127
130 128 self.Rstdout_cache = []
131 129 self.pyconverter = pyconverter
132 130 self.Rconverter = Rconverter
133 131
134 132 def eval(self, line):
135 133 '''
136 134 Parse and evaluate a line with rpy2.
137 135 Returns the output to R's stdout() connection
138 136 and the value of eval(parse(line)).
139 137 '''
140 138 old_writeconsole = ri.get_writeconsole()
141 139 ri.set_writeconsole(self.write_console)
142 140 try:
143 141 value = ri.baseenv['eval'](ri.parse(line))
144 142 except (ri.RRuntimeError, ValueError) as exception:
145 143 raise RMagicError(unicode_to_str('parsing and evaluating line "%s". R traceback: "%s"\n' %
146 144 (line, str_to_unicode(exception.message, 'utf-8'))))
147 145 text_output = self.flush()
148 146 ri.set_writeconsole(old_writeconsole)
149 147 return text_output, value
150 148
151 149 def write_console(self, output):
152 150 '''
153 151 A hook to capture R's stdout in a cache.
154 152 '''
155 153 self.Rstdout_cache.append(output)
156 154
157 155 def flush(self):
158 156 '''
159 157 Flush R's stdout cache to a string, returning the string.
160 158 '''
161 159 value = ''.join([str_to_unicode(s, 'utf-8') for s in self.Rstdout_cache])
162 160 self.Rstdout_cache = []
163 161 return value
164 162
165 163 @skip_doctest
166 164 @line_magic
167 165 def Rpush(self, line):
168 166 '''
169 167 A line-level magic for R that pushes
170 168 variables from python to rpy2. The line should be made up
171 169 of whitespace separated variable names in the IPython
172 170 namespace::
173 171
174 172 In [7]: import numpy as np
175 173
176 174 In [8]: X = np.array([4.5,6.3,7.9])
177 175
178 176 In [9]: X.mean()
179 177 Out[9]: 6.2333333333333343
180 178
181 179 In [10]: %Rpush X
182 180
183 181 In [11]: %R mean(X)
184 182 Out[11]: array([ 6.23333333])
185 183
186 184 '''
187 185
188 186 inputs = line.split(' ')
189 187 for input in inputs:
190 188 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
191 189
192 190 @skip_doctest
193 191 @magic_arguments()
194 192 @argument(
195 193 '-d', '--as_dataframe', action='store_true',
196 194 default=False,
197 195 help='Convert objects to data.frames before returning to ipython.'
198 196 )
199 197 @argument(
200 198 'outputs',
201 199 nargs='*',
202 200 )
203 201 @line_magic
204 202 def Rpull(self, line):
205 203 '''
206 204 A line-level magic for R that pulls
207 205 variables from python to rpy2::
208 206
209 207 In [18]: _ = %R x = c(3,4,6.7); y = c(4,6,7); z = c('a',3,4)
210 208
211 209 In [19]: %Rpull x y z
212 210
213 211 In [20]: x
214 212 Out[20]: array([ 3. , 4. , 6.7])
215 213
216 214 In [21]: y
217 215 Out[21]: array([ 4., 6., 7.])
218 216
219 217 In [22]: z
220 218 Out[22]:
221 219 array(['a', '3', '4'],
222 220 dtype='|S1')
223 221
224 222
225 223 If --as_dataframe, then each object is returned as a structured array
226 224 after first passed through "as.data.frame" in R before
227 225 being calling self.Rconverter.
228 226 This is useful when a structured array is desired as output, or
229 227 when the object in R has mixed data types.
230 228 See the %%R docstring for more examples.
231 229
232 230 Notes
233 231 -----
234 232
235 233 Beware that R names can have '.' so this is not fool proof.
236 234 To avoid this, don't name your R objects with '.'s...
237 235
238 236 '''
239 237 args = parse_argstring(self.Rpull, line)
240 238 outputs = args.outputs
241 239 for output in outputs:
242 240 self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)})
243 241
244 242 @skip_doctest
245 243 @magic_arguments()
246 244 @argument(
247 245 '-d', '--as_dataframe', action='store_true',
248 246 default=False,
249 247 help='Convert objects to data.frames before returning to ipython.'
250 248 )
251 249 @argument(
252 250 'output',
253 251 nargs=1,
254 252 type=str,
255 253 )
256 254 @line_magic
257 255 def Rget(self, line):
258 256 '''
259 257 Return an object from rpy2, possibly as a structured array (if possible).
260 258 Similar to Rpull except only one argument is accepted and the value is
261 259 returned rather than pushed to self.shell.user_ns::
262 260
263 261 In [3]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
264 262
265 263 In [4]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
266 264
267 265 In [5]: %R -i datapy
268 266
269 267 In [6]: %Rget datapy
270 268 Out[6]:
271 269 array([['1', '2', '3', '4'],
272 270 ['2', '3', '2', '5'],
273 271 ['a', 'b', 'c', 'e']],
274 272 dtype='|S1')
275 273
276 274 In [7]: %Rget -d datapy
277 275 Out[7]:
278 276 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
279 277 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
280 278
281 279 '''
282 280 args = parse_argstring(self.Rget, line)
283 281 output = args.output
284 282 return self.Rconverter(self.r(output[0]),dataframe=args.as_dataframe)
285 283
286 284
287 285 @skip_doctest
288 286 @magic_arguments()
289 287 @argument(
290 288 '-i', '--input', action='append',
291 289 help='Names of input variable from shell.user_ns to be assigned to R variables of the same names after calling self.pyconverter. Multiple names can be passed separated only by commas with no whitespace.'
292 290 )
293 291 @argument(
294 292 '-o', '--output', action='append',
295 293 help='Names of variables to be pushed from rpy2 to shell.user_ns after executing cell body and applying self.Rconverter. Multiple names can be passed separated only by commas with no whitespace.'
296 294 )
297 295 @argument(
298 296 '-w', '--width', type=int,
299 297 help='Width of png plotting device sent as an argument to *png* in R.'
300 298 )
301 299 @argument(
302 300 '-h', '--height', type=int,
303 301 help='Height of png plotting device sent as an argument to *png* in R.'
304 302 )
305 303
306 304 @argument(
307 305 '-d', '--dataframe', action='append',
308 306 help='Convert these objects to data.frames and return as structured arrays.'
309 307 )
310 308 @argument(
311 309 '-u', '--units', type=int,
312 310 help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
313 311 )
314 312 @argument(
315 313 '-p', '--pointsize', type=int,
316 314 help='Pointsize of png plotting device sent as an argument to *png* in R.'
317 315 )
318 316 @argument(
319 317 '-b', '--bg',
320 318 help='Background of png plotting device sent as an argument to *png* in R.'
321 319 )
322 320 @argument(
323 321 '-n', '--noreturn',
324 322 help='Force the magic to not return anything.',
325 323 action='store_true',
326 324 default=False
327 325 )
328 326 @argument(
329 327 'code',
330 328 nargs='*',
331 329 )
332 330 @line_cell_magic
333 331 def R(self, line, cell=None):
334 332 '''
335 333 Execute code in R, and pull some of the results back into the Python namespace.
336 334
337 335 In line mode, this will evaluate an expression and convert the returned value to a Python object.
338 336 The return value is determined by rpy2's behaviour of returning the result of evaluating the
339 final line. Multiple R lines can be executed by joining them with semicolons::
337 final line.
338
339 Multiple R lines can be executed by joining them with semicolons::
340 340
341 341 In [9]: %R X=c(1,4,5,7); sd(X); mean(X)
342 342 Out[9]: array([ 4.25])
343 343
344 344 As a cell, this will run a block of R code, without bringing anything back by default::
345 345
346 346 In [10]: %%R
347 347 ....: Y = c(2,4,3,9)
348 348 ....: print(summary(lm(Y~X)))
349 349 ....:
350 350
351 351 Call:
352 352 lm(formula = Y ~ X)
353 353
354 354 Residuals:
355 355 1 2 3 4
356 356 0.88 -0.24 -2.28 1.64
357 357
358 358 Coefficients:
359 359 Estimate Std. Error t value Pr(>|t|)
360 360 (Intercept) 0.0800 2.3000 0.035 0.975
361 361 X 1.0400 0.4822 2.157 0.164
362 362
363 363 Residual standard error: 2.088 on 2 degrees of freedom
364 364 Multiple R-squared: 0.6993,Adjusted R-squared: 0.549
365 365 F-statistic: 4.651 on 1 and 2 DF, p-value: 0.1638
366 366
367 367 In the notebook, plots are published as the output of the cell.
368 368
369 369 %R plot(X, Y)
370 370
371 371 will create a scatter plot of X bs Y.
372 372
373 373 If cell is not None and line has some R code, it is prepended to
374 374 the R code in cell.
375 375
376 376 Objects can be passed back and forth between rpy2 and python via the -i -o flags in line::
377 377
378 378 In [14]: Z = np.array([1,4,5,10])
379 379
380 380 In [15]: %R -i Z mean(Z)
381 381 Out[15]: array([ 5.])
382 382
383 383
384 384 In [16]: %R -o W W=Z*mean(Z)
385 385 Out[16]: array([ 5., 20., 25., 50.])
386 386
387 387 In [17]: W
388 388 Out[17]: array([ 5., 20., 25., 50.])
389 389
390 390 The return value is determined by these rules:
391 391
392 392 * If the cell is not None, the magic returns None.
393 393
394 394 * If the cell evaluates as False, the resulting value is returned
395 395 unless the final line prints something to the console, in
396 396 which case None is returned.
397 397
398 398 * If the final line results in a NULL value when evaluated
399 399 by rpy2, then None is returned.
400 400
401 The --dataframe argument will return structured arrays
402 from dataframes in R. This is useful for dataframes with
401 * No attempt is made to convert the final value to a structured array.
402 Use the --dataframe flag or %Rget to push / return a structured array.
403
404 The --dataframe argument will attempt to return structured arrays.
405 This is useful for dataframes with
403 406 mixed data types. Note also that for a data.frame,
404 407 if it is returned as an ndarray, it is transposed::
405 408
406 409 In [18]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
407 410
408 411 In [19]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
409 412
410 413 In [20]: %%R -o datar
411 414 datar = datapy
412 415 ....:
413 416
414 417 In [21]: datar
415 418 Out[21]:
416 419 array([['1', '2', '3', '4'],
417 420 ['2', '3', '2', '5'],
418 421 ['a', 'b', 'c', 'e']],
419 422 dtype='|S1')
420 423
421 424 In [22]: %%R -d datar
422 425 datar = datapy
423 426 ....:
424 427
425 428 In [23]: datar
426 429 Out[23]:
427 430 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
428 431 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
429 432
430 The --dataframe argument first tries colnames, then rownames, then names.
431 If all are NULL, it returns an ndarray (i.e. unstructured)::
432
433 The --dataframe argument first tries colnames, then names.
434 If both are NULL, it returns an ndarray (i.e. unstructured)::
433 435
434 436 In [1]: %R mydata=c(4,6,8.3); NULL
435 437
436 438 In [2]: %R -d mydata
437 439
438 440 In [3]: mydata
439 441 Out[3]: array([ 4. , 6. , 8.3])
440 442
441 443 In [4]: %R names(mydata) = c('a','b','c'); NULL
442 444
443 445 In [5]: %R -d mydata
444 446
445 447 In [6]: mydata
446 448 Out[6]:
447 449 array((4.0, 6.0, 8.3),
448 450 dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
449 451
450 452 In [7]: %R -o mydata
451 453
452 454 In [8]: mydata
453 455 Out[8]: array([ 4. , 6. , 8.3])
454 456
455
456 457 '''
457 458
458 459 args = parse_argstring(self.R, line)
459 460
460 461 # arguments 'code' in line are prepended to
461 462 # the cell lines
462 463 if not cell:
463 464 code = ''
464 465 return_output = True
465 466 line_mode = True
466 467 else:
467 468 code = cell
468 469 return_output = False
469 470 line_mode = False
470 471
471 472 code = ' '.join(args.code) + code
472 473
473 474 if args.input:
474 475 for input in ','.join(args.input).split(','):
475 476 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
476 477
477 478 png_argdict = dict([(n, getattr(args, n)) for n in ['units', 'height', 'width', 'bg', 'pointsize']])
478 479 png_args = ','.join(['%s=%s' % (o,v) for o, v in png_argdict.items() if v is not None])
479 480 # execute the R code in a temporary directory
480 481
481 482 tmpd = tempfile.mkdtemp()
482 483 self.r('png("%s/Rplots%%03d.png",%s)' % (tmpd, png_args))
483 484
484 485 text_output = ''
485 486 if line_mode:
486 487 for line in code.split(';'):
487 488 text_result, result = self.eval(line)
488 489 text_output += text_result
489 490 if text_result:
490 491 # the last line printed something to the console so we won't return it
491 492 return_output = False
492 493 else:
493 494 text_result, result = self.eval(code)
494 495 text_output += text_result
495 496
496 497 self.r('dev.off()')
497 498
498 499 # read out all the saved .png files
499 500
500 501 images = [open(imgfile, 'rb').read() for imgfile in glob("%s/Rplots*png" % tmpd)]
501 502
502 503 # now publish the images
503 504 # mimicking IPython/zmq/pylab/backend_inline.py
504 505 fmt = 'png'
505 506 mimetypes = { 'png' : 'image/png', 'svg' : 'image/svg+xml' }
506 507 mime = mimetypes[fmt]
507 508
508 509 # publish the printed R objects, if any
509 510
510 511 display_data = []
511 512 if text_output:
512 513 display_data.append(('RMagic.R', {'text/plain':text_output}))
513 514
514 515 # flush text streams before sending figures, helps a little with output
515 516 for image in images:
516 517 # synchronization in the console (though it's a bandaid, not a real sln)
517 518 sys.stdout.flush(); sys.stderr.flush()
518 519 display_data.append(('RMagic.R', {mime: image}))
519 520
520 521 # kill the temporary directory
521 522 rmtree(tmpd)
522 523
523 524 # try to turn every output into a numpy array
524 525 # this means that output are assumed to be castable
525 526 # as numpy arrays
526 527
527 528 if args.output:
528 529 for output in ','.join(args.output).split(','):
529 530 self.shell.push({output:self.Rconverter(self.r(output), dataframe=False)})
530 531
531 532 if args.dataframe:
532 533 for output in ','.join(args.dataframe).split(','):
533 534 self.shell.push({output:self.Rconverter(self.r(output), dataframe=True)})
534 535
535 536 for tag, disp_d in display_data:
536 537 publish_display_data(tag, disp_d)
537 538
538 539 # this will keep a reference to the display_data
539 540 # which might be useful to other objects who happen to use
540 541 # this method
541 542
542 543 if self.cache_display_data:
543 544 self.display_cache = display_data
544 545
545 546 # if in line mode and return_output, return the result as an ndarray
546 547 if return_output and not args.noreturn:
547 548 if result != ri.NULL:
548 549 return self.Rconverter(result, dataframe=False)
549 550
550 551 __doc__ = __doc__.format(
551 552 R_DOC = ' '*8 + RMagics.R.__doc__,
552 553 RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
553 554 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__
554 555 RGET_DOC = ' '*8 + RMagics.Rget.__doc__
555 556 )
556 557
557 558
558 559 _loaded = False
559 560 def load_ipython_extension(ip):
560 561 """Load the extension in IPython."""
561 562 global _loaded
562 563 if not _loaded:
563 564 ip.register_magics(RMagics)
564 565 _loaded = True
General Comments 0
You need to be logged in to leave comments. Login now