##// END OF EJS Templates
added Rget, and options to try to return strucutred array
Jonathan Taylor -
Show More
@@ -21,6 +21,10 Usage
21
21
22 {RPULL_DOC}
22 {RPULL_DOC}
23
23
24 ``%Rget``
25
26 {RGET_DOC}
27
24 """
28 """
25
29
26 #-----------------------------------------------------------------------------
30 #-----------------------------------------------------------------------------
@@ -59,31 +63,41 from IPython.utils.py3compat import str_to_unicode, unicode_to_str
59 class RMagicError(ri.RRuntimeError):
63 class RMagicError(ri.RRuntimeError):
60 pass
64 pass
61
65
62 def Rconverter(Robj):
66 def Rconverter(Robj, dataframe=False):
63 """
67 """
64 Convert an object in R's namespace to one suitable
68 Convert an object in R's namespace to one suitable
65 for ipython's namespace.
69 for ipython's namespace.
66
70
67 For a data.frame, it tries to return a structured array.
71 For a data.frame, it tries to return a structured array.
72 It first checks for colnames, then names.
73 If all are NULL, it returns np.asarray(Robj), else
74 it tries to construct a recarray
68
75
69 Parameters
76 Parameters
70 ----------
77 ----------
71
78
72 Robj: an R object returned from rpy2
79 Robj: an R object returned from rpy2
73 """
80 """
74 if is_data_frame(Robj):
81 is_data_frame = ro.r('is.data.frame')
75 Robj = as_data_frame(Robj)
82 colnames = ro.r('colnames')
76 dimRobj = list(np.array(dimR(Robj)))
83 rownames = ro.r('rownames') # with pandas, these could be used for the index
77 if 1 not in dimRobj:
84 names = ro.r('names')
78 Robj = np.rec.fromarrays(Robj, names = tuple(Robj.names))
85
79 return np.squeeze(np.asarray(Robj))
86
80
87 if dataframe:
81 is_data_frame = None
88 as_data_frame = ro.r('as.data.frame')
82 as_data_frame = None
89 cols = colnames(Robj)
83 dimR = None
90 rows = rownames(Robj)
84 colnames = None
91 _names = names(Robj)
85 ncol = None
92 if cols != ri.NULL:
86 nrow = None
93 Robj = as_data_frame(Robj)
94 names = tuple(np.array(cols))
95 elif _names != ri.NULL:
96 names = tuple(np.array(_names))
97 else: # failed to find names
98 return np.asarray(Robj)
99 Robj = np.rec.fromarrays(Robj, names = names)
100 return np.asarray(Robj)
87
101
88 @magics_class
102 @magics_class
89 class RMagics(Magics):
103 class RMagics(Magics):
@@ -112,13 +126,6 class RMagics(Magics):
112 self.cache_display_data = cache_display_data
126 self.cache_display_data = cache_display_data
113
127
114 self.r = ro.R()
128 self.r = ro.R()
115 global is_data_frame, dimR, colnames, ncol, nrow, as_data_frame
116 is_data_frame = self.r('is.data.frame')
117 as_data_frame = self.r('as.data.frame')
118 dimR = self.r('dim')
119 colnames = self.r('colnames')
120 ncol = self.r('ncol')
121 nrow = self.r('nrow')
122
129
123 self.Rstdout_cache = []
130 self.Rstdout_cache = []
124 self.pyconverter = pyconverter
131 self.pyconverter = pyconverter
@@ -183,6 +190,16 class RMagics(Magics):
183 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
190 self.r.assign(input, self.pyconverter(self.shell.user_ns[input]))
184
191
185 @skip_doctest
192 @skip_doctest
193 @magic_arguments()
194 @argument(
195 '-d', '--as_dataframe', action='store_true',
196 default=False,
197 help='Convert objects to data.frames before returning to ipython.'
198 )
199 @argument(
200 'outputs',
201 nargs='*',
202 )
186 @line_magic
203 @line_magic
187 def Rpull(self, line):
204 def Rpull(self, line):
188 '''
205 '''
@@ -205,6 +222,13 class RMagics(Magics):
205 dtype='|S1')
222 dtype='|S1')
206
223
207
224
225 If --as_dataframe, then each object is returned as a structured array
226 after first passed through "as.data.frame" in R before
227 being calling self.Rconverter.
228 This is useful when a structured array is desired as output, or
229 when the object in R has mixed data types.
230 See the %%R docstring for more examples.
231
208 Notes
232 Notes
209 -----
233 -----
210
234
@@ -212,9 +236,52 class RMagics(Magics):
212 To avoid this, don't name your R objects with '.'s...
236 To avoid this, don't name your R objects with '.'s...
213
237
214 '''
238 '''
215 outputs = line.split(' ')
239 args = parse_argstring(self.Rpull, line)
240 outputs = args.outputs
216 for output in outputs:
241 for output in outputs:
217 self.shell.push({output:self.Rconverter(self.r(output))})
242 self.shell.push({output:self.Rconverter(self.r(output),dataframe=args.as_dataframe)})
243
244 @skip_doctest
245 @magic_arguments()
246 @argument(
247 '-d', '--as_dataframe', action='store_true',
248 default=False,
249 help='Convert objects to data.frames before returning to ipython.'
250 )
251 @argument(
252 'output',
253 nargs=1,
254 type=str,
255 )
256 @line_magic
257 def Rget(self, line):
258 '''
259 Return an object from rpy2, possibly as a structured array (if possible).
260 Similar to Rpull except only one argument is accepted and the value is
261 returned rather than pushed to self.shell.user_ns::
262
263 In [3]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
264
265 In [4]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
266
267 In [5]: %R -i datapy
268
269 In [6]: %Rget datapy
270 Out[6]:
271 array([['1', '2', '3', '4'],
272 ['2', '3', '2', '5'],
273 ['a', 'b', 'c', 'e']],
274 dtype='|S1')
275
276 In [7]: %Rget -d datapy
277 Out[7]:
278 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
279 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
280
281 '''
282 args = parse_argstring(self.Rget, line)
283 output = args.output
284 return self.Rconverter(self.r(output[0]),dataframe=args.as_dataframe)
218
285
219
286
220 @skip_doctest
287 @skip_doctest
@@ -237,6 +304,10 class RMagics(Magics):
237 )
304 )
238
305
239 @argument(
306 @argument(
307 '-d', '--dataframe', action='append',
308 help='Convert these objects to data.frames and return as structured arrays.'
309 )
310 @argument(
240 '-u', '--units', type=int,
311 '-u', '--units', type=int,
241 help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
312 help='Units of png plotting device sent as an argument to *png* in R. One of ["px", "in", "cm", "mm"].'
242 )
313 )
@@ -327,6 +398,60 class RMagics(Magics):
327 * If the final line results in a NULL value when evaluated
398 * If the final line results in a NULL value when evaluated
328 by rpy2, then None is returned.
399 by rpy2, then None is returned.
329
400
401 The --dataframe argument will return structured arrays
402 from dataframes in R. This is useful for dataframes with
403 mixed data types. Note also that for a data.frame,
404 if it is returned as an ndarray, it is transposed::
405
406 In [18]: dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')]
407
408 In [19]: datapy = np.array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5, 'e')], dtype=dtype)
409
410 In [20]: %%R -o datar
411 datar = datapy
412 ....:
413
414 In [21]: datar
415 Out[21]:
416 array([['1', '2', '3', '4'],
417 ['2', '3', '2', '5'],
418 ['a', 'b', 'c', 'e']],
419 dtype='|S1')
420
421 In [22]: %%R -d datar
422 datar = datapy
423 ....:
424
425 In [23]: datar
426 Out[23]:
427 array([(1, 2.9, 'a'), (2, 3.5, 'b'), (3, 2.1, 'c'), (4, 5.0, 'e')],
428 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '|S1')])
429
430 The --dataframe argument first tries colnames, then rownames, then names.
431 If all are NULL, it returns an ndarray (i.e. unstructured)::
432
433
434 In [1]: %R mydata=c(4,6,8.3); NULL
435
436 In [2]: %R -d mydata
437
438 In [3]: mydata
439 Out[3]: array([ 4. , 6. , 8.3])
440
441 In [4]: %R names(mydata) = c('a','b','c'); NULL
442
443 In [5]: %R -d mydata
444
445 In [6]: mydata
446 Out[6]:
447 array((4.0, 6.0, 8.3),
448 dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
449
450 In [7]: %R -o mydata
451
452 In [8]: mydata
453 Out[8]: array([ 4. , 6. , 8.3])
454
330
455
331 '''
456 '''
332
457
@@ -401,7 +526,11 class RMagics(Magics):
401
526
402 if args.output:
527 if args.output:
403 for output in ','.join(args.output).split(','):
528 for output in ','.join(args.output).split(','):
404 self.shell.push({output:self.Rconverter(self.r(output))})
529 self.shell.push({output:self.Rconverter(self.r(output), dataframe=False)})
530
531 if args.dataframe:
532 for output in ','.join(args.dataframe).split(','):
533 self.shell.push({output:self.Rconverter(self.r(output), dataframe=True)})
405
534
406 for tag, disp_d in display_data:
535 for tag, disp_d in display_data:
407 publish_display_data(tag, disp_d)
536 publish_display_data(tag, disp_d)
@@ -416,12 +545,13 class RMagics(Magics):
416 # if in line mode and return_output, return the result as an ndarray
545 # if in line mode and return_output, return the result as an ndarray
417 if return_output and not args.noreturn:
546 if return_output and not args.noreturn:
418 if result != ri.NULL:
547 if result != ri.NULL:
419 return self.Rconverter(result)
548 return self.Rconverter(result, dataframe=False)
420
549
421 __doc__ = __doc__.format(
550 __doc__ = __doc__.format(
422 R_DOC = ' '*8 + RMagics.R.__doc__,
551 R_DOC = ' '*8 + RMagics.R.__doc__,
423 RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
552 RPUSH_DOC = ' '*8 + RMagics.Rpush.__doc__,
424 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__
553 RPULL_DOC = ' '*8 + RMagics.Rpull.__doc__
554 RGET_DOC = ' '*8 + RMagics.Rget.__doc__
425 )
555 )
426
556
427
557
@@ -28,17 +28,17 def test_Rconverter():
28 ip.run_line_magic('Rpush', 'datapy')
28 ip.run_line_magic('Rpush', 'datapy')
29
29
30 # test to see if a copy is being made
30 # test to see if a copy is being made
31 v = ip.run_line_magic('R', 'datapy')
31 v = ip.run_line_magic('Rget', '-d datapy')
32 w = ip.run_line_magic('R', 'datapy')
32 w = ip.run_line_magic('Rget', '-d datapy')
33 np.testing.assert_almost_equal(w['x'], v['x'])
33 np.testing.assert_almost_equal(w['x'], v['x'])
34 np.testing.assert_almost_equal(w['y'], v['y'])
34 np.testing.assert_almost_equal(w['y'], v['y'])
35 nt.assert_true(np.all(w['z'] == v['z']))
35 nt.assert_true(np.all(w['z'] == v['z']))
36 np.testing.assert_equal(id(w.data), id(v.data))
36 np.testing.assert_equal(id(w.data), id(v.data))
37 nt.assert_equal(w.dtype, v.dtype)
37 nt.assert_equal(w.dtype, v.dtype)
38
38
39 ip.run_cell_magic('R', ' -o datar datar=datapy', '')
39 ip.run_cell_magic('R', ' -d datar datar=datapy', '')
40
40
41 u = ip.run_line_magic('R', 'datar')
41 u = ip.run_line_magic('Rget', ' -d datar')
42 np.testing.assert_almost_equal(u['x'], v['x'])
42 np.testing.assert_almost_equal(u['x'], v['x'])
43 np.testing.assert_almost_equal(u['y'], v['y'])
43 np.testing.assert_almost_equal(u['y'], v['y'])
44 nt.assert_true(np.all(u['z'] == v['z']))
44 nt.assert_true(np.all(u['z'] == v['z']))
General Comments 0
You need to be logged in to leave comments. Login now