##// END OF EJS Templates
added Rget, and options to try to return strucutred array
Jonathan Taylor -
Show More
@@ -21,6 +21,10 Usage
21 21
22 22 {RPULL_DOC}
23 23
24 ``%Rget``
25
26 {RGET_DOC}
27
24 28 """
25 29
26 30 #-----------------------------------------------------------------------------
@@ -59,31 +63,41 from IPython.utils.py3compat import str_to_unicode, unicode_to_str
59 63 class RMagicError(ri.RRuntimeError):
60 64 pass
61 65
62 def Rconverter(Robj):
66 def Rconverter(Robj, dataframe=False):
63 67 """
64 68 Convert an object in R's namespace to one suitable
65 69 for ipython's namespace.
66 70
67 71 For a data.frame, it tries to return a structured array.
72 It first checks for colnames, then names.
73 If all are NULL, it returns np.asarray(Robj), else
74 it tries to construct a recarray
68 75
69 76 Parameters
70 77 ----------
71 78
72 79 Robj: an R object returned from rpy2
73 80 """
74 if is_data_frame(Robj):
75 Robj = as_data_frame(Robj)
76 dimRobj = list(np.array(dimR(Robj)))
77 if 1 not in dimRobj:
78 Robj = np.rec.fromarrays(Robj, names = tuple(Robj.names))
79 return np.squeeze(np.asarray(Robj))
80
81 is_data_frame = None
82 as_data_frame = None
83 dimR = None
84 colnames = None
85 ncol = None
86 nrow = None
81 is_data_frame = ro.r('is.data.frame')
82 colnames = ro.r('colnames')
83 rownames = ro.r('rownames') # with pandas, these could be used for the index
84 names = ro.r('names')
85
86
87 if dataframe:
88 as_data_frame = ro.r('as.data.frame')
89 cols = colnames(Robj)
90 rows = rownames(Robj)
91 _names = names(Robj)
92 if cols != ri.NULL:
93 Robj = as_data_frame(Robj)
94 names = tuple(np.array(cols))
95 elif _names != ri.NULL:
96 names = tuple(np.array(_names))
97 else: # failed to find names
98 return np.asarray(Robj)
99 Robj = np.rec.fromarrays(Robj, names = names)
100 return np.asarray(Robj)
87 101
88 102 @magics_class
89 103 class RMagics(Magics):
@@ -112,13 +126,6 class RMagics(Magics):
112 126 self.cache_display_data = cache_display_data
113 127
114 128 self.r = ro.R()
115 global is_data_frame, dimR, colnames, ncol, nrow, as_data_frame
116 is_data_frame = self.r('is.data.frame')
117 as_data_frame = self.r('as.data.frame')
118 dimR = self.r('dim')
119 colnames = self.r('colnames')
120 ncol = self.r('ncol')
121 nrow = self.r('nrow')
122 129
123 130 self.Rstdout_cache = []
124 131 self.pyconverter = pyconverter
@@ -183,6 +190,16 class RMagics(Magics):
183 190 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
184 191
185 192 @skip_doctest
193 @magic_arguments()
194 @argument(
195 '-d', '--as_dataframe', action='store_true',
196 default=False,
197 help='Convert objects to data.frames before returning to ipython.'
198 )
199 @argument(
200 'outputs',
201 nargs='*',
202 )
186 203 @line_magic
187 204 def Rpull(self, line):
188 205 '''
@@ -205,6 +222,13 class RMagics(Magics):
205 222 dtype='|S1')
206 223
207 224
225 If --as_dataframe, then each object is returned as a structured array
226 after first passed through "as.data.frame" in R before
227 being calling self.Rconverter.
228 This is useful when a structured array is desired as output, or
229 when the object in R has mixed data types.
230 See the %%R docstring for more examples.
231
208 232 Notes
209 233 -----
210 234
@@ -212,9 +236,52 class RMagics(Magics):
212 236 To avoid this, don't name your R objects with '.'s...
213 237
214 238 '''
215 outputs = line.split(' ')
239 args = parse_argstring(self.Rpull, line)
240 outputs = args.outputs
216 241 for output in outputs:
217 self.shell.push({output:self.Rconverter(self.r(output))})
242 self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)})
243
244 @skip_doctest
245 @magic_arguments()
246 @argument(
247 '-d', '--as_dataframe', action='store_true',
248 default=False,
249 help='Convert objects to data.frames before returning to ipython.'
250 )
251 @argument(
252 'output',
253 nargs=1,
254 type=str,
255 )
256 @line_magic
257 def Rget(self, line):
258 '''
259 Return an object from rpy2, possibly as a structured array (if possible).
260 Similar to Rpull except only one argument is accepted and the value is
261 returned rather than pushed to self.shell.user_ns::
262
263 In [3]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
264
265 In [4]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
266
267 In [5]: %R -i datapy
268
269 In [6]: %Rget datapy
270 Out[6]:
271 array([['1', '2', '3', '4'],
272 ['2', '3', '2', '5'],
273 ['a', 'b', 'c', 'e']],
274 dtype='|S1')
275
276 In [7]: %Rget -d datapy
277 Out[7]:
278 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
279 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
280
281 '''
282 args = parse_argstring(self.Rget, line)
283 output = args.output
284 return self.Rconverter(self.r(output[0]),dataframe=args.as_dataframe)
218 285
219 286
220 287 @skip_doctest
@@ -237,6 +304,10 class RMagics(Magics):
237 304 )
238 305
239 306 @argument(
307 '-d', '--dataframe', action='append',
308 help='Convert these objects to data.frames and return as structured arrays.'
309 )
310 @argument(
240 311 '-u', '--units', type=int,
241 312 help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
242 313 )
@@ -327,6 +398,60 class RMagics(Magics):
327 398 * If the final line results in a NULL value when evaluated
328 399 by rpy2, then None is returned.
329 400
401 The --dataframe argument will return structured arrays
402 from dataframes in R. This is useful for dataframes with
403 mixed data types. Note also that for a data.frame,
404 if it is returned as an ndarray, it is transposed::
405
406 In [18]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
407
408 In [19]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
409
410 In [20]: %%R -o datar
411 datar = datapy
412 ....:
413
414 In [21]: datar
415 Out[21]:
416 array([['1', '2', '3', '4'],
417 ['2', '3', '2', '5'],
418 ['a', 'b', 'c', 'e']],
419 dtype='|S1')
420
421 In [22]: %%R -d datar
422 datar = datapy
423 ....:
424
425 In [23]: datar
426 Out[23]:
427 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
428 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
429
430 The --dataframe argument first tries colnames, then rownames, then names.
431 If all are NULL, it returns an ndarray (i.e. unstructured)::
432
433
434 In [1]: %R mydata=c(4,6,8.3); NULL
435
436 In [2]: %R -d mydata
437
438 In [3]: mydata
439 Out[3]: array([ 4. , 6. , 8.3])
440
441 In [4]: %R names(mydata) = c('a','b','c'); NULL
442
443 In [5]: %R -d mydata
444
445 In [6]: mydata
446 Out[6]:
447 array((4.0, 6.0, 8.3),
448 dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
449
450 In [7]: %R -o mydata
451
452 In [8]: mydata
453 Out[8]: array([ 4. , 6. , 8.3])
454
330 455
331 456 '''
332 457
@@ -401,7 +526,11 class RMagics(Magics):
401 526
402 527 if args.output:
403 528 for output in ','.join(args.output).split(','):
404 self.shell.push({output:self.Rconverter(self.r(output))})
529 self.shell.push({output:self.Rconverter(self.r(output), dataframe=False)})
530
531 if args.dataframe:
532 for output in ','.join(args.dataframe).split(','):
533 self.shell.push({output:self.Rconverter(self.r(output), dataframe=True)})
405 534
406 535 for tag, disp_d in display_data:
407 536 publish_display_data(tag, disp_d)
@@ -416,12 +545,13 class RMagics(Magics):
416 545 # if in line mode and return_output, return the result as an ndarray
417 546 if return_output and not args.noreturn:
418 547 if result != ri.NULL:
419 return self.Rconverter(result)
548 return self.Rconverter(result, dataframe=False)
420 549
421 550 __doc__ = __doc__.format(
422 551 R_DOC = ' '*8 + RMagics.R.__doc__,
423 552 RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
424 553 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__
554 RGET_DOC = ' '*8 + RMagics.Rget.__doc__
425 555 )
426 556
427 557
@@ -28,17 +28,17 def test_Rconverter():
28 28 ip.run_line_magic('Rpush', 'datapy')
29 29
30 30 # test to see if a copy is being made
31 v = ip.run_line_magic('R', 'datapy')
32 w = ip.run_line_magic('R', 'datapy')
31 v = ip.run_line_magic('Rget', '-d datapy')
32 w = ip.run_line_magic('Rget', '-d datapy')
33 33 np.testing.assert_almost_equal(w['x'], v['x'])
34 34 np.testing.assert_almost_equal(w['y'], v['y'])
35 35 nt.assert_true(np.all(w['z'] == v['z']))
36 36 np.testing.assert_equal(id(w.data), id(v.data))
37 37 nt.assert_equal(w.dtype, v.dtype)
38 38
39 ip.run_cell_magic('R', ' -o datar datar=datapy', '')
39 ip.run_cell_magic('R', ' -d datar datar=datapy', '')
40 40
41 u = ip.run_line_magic('R', 'datar')
41 u = ip.run_line_magic('Rget', ' -d datar')
42 42 np.testing.assert_almost_equal(u['x'], v['x'])
43 43 np.testing.assert_almost_equal(u['y'], v['y'])
44 44 nt.assert_true(np.all(u['z'] == v['z']))
General Comments 0
You need to be logged in to leave comments. Login now