##// END OF EJS Templates
remove abspath in conversion process
Matthias BUSSONNIER -
Show More
@@ -1,323 +1,323 b''
1 from __future__ import print_function, absolute_import
1 from __future__ import print_function, absolute_import
2 from converters.utils import remove_fake_files_url
2 from converters.utils import remove_fake_files_url
3
3
4 # Stdlib
4 # Stdlib
5 import codecs
5 import codecs
6 import io
6 import io
7 import logging
7 import logging
8 import os
8 import os
9 import pprint
9 import pprint
10 from types import FunctionType
10 from types import FunctionType
11
11
12 # From IPython
12 # From IPython
13 from IPython.nbformat import current as nbformat
13 from IPython.nbformat import current as nbformat
14
14
15 # local
15 # local
16
16
17 #-----------------------------------------------------------------------------
17 #-----------------------------------------------------------------------------
18 # Class declarations
18 # Class declarations
19 #-----------------------------------------------------------------------------
19 #-----------------------------------------------------------------------------
20
20
21 class ConversionException(Exception):
21 class ConversionException(Exception):
22 pass
22 pass
23
23
24 class DocStringInheritor(type):
24 class DocStringInheritor(type):
25 """
25 """
26 This metaclass will walk the list of bases until the desired
26 This metaclass will walk the list of bases until the desired
27 superclass method is found AND if that method has a docstring and only
27 superclass method is found AND if that method has a docstring and only
28 THEN does it attach the superdocstring to the derived class method.
28 THEN does it attach the superdocstring to the derived class method.
29
29
30 Please use carefully, I just did the metaclass thing by following
30 Please use carefully, I just did the metaclass thing by following
31 Michael Foord's Metaclass tutorial
31 Michael Foord's Metaclass tutorial
32 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
32 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
33 have missed a step or two.
33 have missed a step or two.
34
34
35 source:
35 source:
36 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
36 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
37 by Paul McGuire
37 by Paul McGuire
38 """
38 """
39 def __new__(meta, classname, bases, classDict):
39 def __new__(meta, classname, bases, classDict):
40 newClassDict = {}
40 newClassDict = {}
41 for attributeName, attribute in classDict.items():
41 for attributeName, attribute in classDict.items():
42 if type(attribute) == FunctionType:
42 if type(attribute) == FunctionType:
43 # look through bases for matching function by name
43 # look through bases for matching function by name
44 for baseclass in bases:
44 for baseclass in bases:
45 if hasattr(baseclass, attributeName):
45 if hasattr(baseclass, attributeName):
46 basefn = getattr(baseclass, attributeName)
46 basefn = getattr(baseclass, attributeName)
47 if basefn.__doc__:
47 if basefn.__doc__:
48 attribute.__doc__ = basefn.__doc__
48 attribute.__doc__ = basefn.__doc__
49 break
49 break
50 newClassDict[attributeName] = attribute
50 newClassDict[attributeName] = attribute
51 return type.__new__(meta, classname, bases, newClassDict)
51 return type.__new__(meta, classname, bases, newClassDict)
52
52
53 class Converter(object):
53 class Converter(object):
54 __metaclass__ = DocStringInheritor
54 __metaclass__ = DocStringInheritor
55 default_encoding = 'utf-8'
55 default_encoding = 'utf-8'
56 extension = str()
56 extension = str()
57 figures_counter = 0
57 figures_counter = 0
58 infile = str()
58 infile = str()
59 infile_dir = str()
59 infile_dir = str()
60 infile_root = str()
60 infile_root = str()
61 files_dir = str()
61 files_dir = str()
62 with_preamble = True
62 with_preamble = True
63 user_preamble = None
63 user_preamble = None
64 output = unicode()
64 output = unicode()
65 raw_as_verbatim = False
65 raw_as_verbatim = False
66
66
67 def __init__(self, infile):
67 def __init__(self, infile):
68 self.infile = infile
68 self.infile = infile
69 self.infile_dir, infile_root = os.path.split(infile)
69 self.infile_dir, infile_root = os.path.split(infile)
70 infile_root = os.path.splitext(infile_root)[0]
70 infile_root = os.path.splitext(infile_root)[0]
71 files_dir = os.path.join(self.infile_dir, infile_root + '_files')
71 files_dir = os.path.join(self.infile_dir, infile_root + '_files')
72 if not os.path.isdir(files_dir):
72 if not os.path.isdir(files_dir):
73 os.mkdir(files_dir)
73 os.mkdir(files_dir)
74 self.infile_root = infile_root
74 self.infile_root = infile_root
75 self.files_dir = os.path.abspath(files_dir)
75 self.files_dir = files_dir
76 self.outbase = os.path.join(self.infile_dir, infile_root)
76 self.outbase = os.path.join(self.infile_dir, infile_root)
77
77
78 def __del__(self):
78 def __del__(self):
79 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
79 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
80 os.rmdir(self.files_dir)
80 os.rmdir(self.files_dir)
81
81
82 def dispatch(self, cell_type):
82 def dispatch(self, cell_type):
83 """return cell_type dependent render method, for example render_code
83 """return cell_type dependent render method, for example render_code
84 """
84 """
85 return getattr(self, 'render_' + cell_type, self.render_unknown)
85 return getattr(self, 'render_' + cell_type, self.render_unknown)
86
86
87 def dispatch_display_format(self, format):
87 def dispatch_display_format(self, format):
88 """return output_type dependent render method, for example render_output_text
88 """return output_type dependent render method, for example render_output_text
89 """
89 """
90 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
90 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
91
91
92 def convert(self, cell_separator='\n'):
92 def convert(self, cell_separator='\n'):
93 """
93 """
94 Generic method to converts notebook to a string representation.
94 Generic method to converts notebook to a string representation.
95
95
96 This is accomplished by dispatching on the cell_type, so subclasses of
96 This is accomplished by dispatching on the cell_type, so subclasses of
97 Convereter class do not need to re-implement this method, but just
97 Convereter class do not need to re-implement this method, but just
98 need implementation for the methods that will be dispatched.
98 need implementation for the methods that will be dispatched.
99
99
100 Parameters
100 Parameters
101 ----------
101 ----------
102 cell_separator : string
102 cell_separator : string
103 Character or string to join cells with. Default is "\n"
103 Character or string to join cells with. Default is "\n"
104
104
105 Returns
105 Returns
106 -------
106 -------
107 out : string
107 out : string
108 """
108 """
109 lines = []
109 lines = []
110 lines.extend(self.optional_header())
110 lines.extend(self.optional_header())
111 lines.extend(self.main_body(cell_separator))
111 lines.extend(self.main_body(cell_separator))
112 lines.extend(self.optional_footer())
112 lines.extend(self.optional_footer())
113 return u'\n'.join(lines)
113 return u'\n'.join(lines)
114
114
115 def main_body(self, cell_separator='\n'):
115 def main_body(self, cell_separator='\n'):
116 converted_cells = []
116 converted_cells = []
117 for worksheet in self.nb.worksheets:
117 for worksheet in self.nb.worksheets:
118 for cell in worksheet.cells:
118 for cell in worksheet.cells:
119 #print(cell.cell_type) # dbg
119 #print(cell.cell_type) # dbg
120 conv_fn = self.dispatch(cell.cell_type)
120 conv_fn = self.dispatch(cell.cell_type)
121 if cell.cell_type in ('markdown', 'raw'):
121 if cell.cell_type in ('markdown', 'raw'):
122 remove_fake_files_url(cell)
122 remove_fake_files_url(cell)
123 converted_cells.append('\n'.join(conv_fn(cell)))
123 converted_cells.append('\n'.join(conv_fn(cell)))
124 cell_lines = cell_separator.join(converted_cells).split('\n')
124 cell_lines = cell_separator.join(converted_cells).split('\n')
125 return cell_lines
125 return cell_lines
126
126
127 def render(self):
127 def render(self):
128 "read, convert, and save self.infile"
128 "read, convert, and save self.infile"
129 if not hasattr(self, 'nb'):
129 if not hasattr(self, 'nb'):
130 self.read()
130 self.read()
131 self.output = self.convert()
131 self.output = self.convert()
132 assert(type(self.output) == unicode)
132 assert(type(self.output) == unicode)
133 return self.save()
133 return self.save()
134
134
135 def read(self):
135 def read(self):
136 "read and parse notebook into NotebookNode called self.nb"
136 "read and parse notebook into NotebookNode called self.nb"
137 with open(self.infile) as f:
137 with open(self.infile) as f:
138 self.nb = nbformat.read(f, 'json')
138 self.nb = nbformat.read(f, 'json')
139
139
140 def save(self, outfile=None, encoding=None):
140 def save(self, outfile=None, encoding=None):
141 "read and parse notebook into self.nb"
141 "read and parse notebook into self.nb"
142 if outfile is None:
142 if outfile is None:
143 outfile = self.outbase + '.' + self.extension
143 outfile = self.outbase + '.' + self.extension
144 if encoding is None:
144 if encoding is None:
145 encoding = self.default_encoding
145 encoding = self.default_encoding
146 with io.open(outfile, 'w', encoding=encoding) as f:
146 with io.open(outfile, 'w', encoding=encoding) as f:
147 f.write(self.output)
147 f.write(self.output)
148 return os.path.abspath(outfile)
148 return os.path.abspath(outfile)
149
149
150 def optional_header(self):
150 def optional_header(self):
151 """
151 """
152 Optional header to insert at the top of the converted notebook
152 Optional header to insert at the top of the converted notebook
153
153
154 Returns a list
154 Returns a list
155 """
155 """
156 return []
156 return []
157
157
158 def optional_footer(self):
158 def optional_footer(self):
159 """
159 """
160 Optional footer to insert at the end of the converted notebook
160 Optional footer to insert at the end of the converted notebook
161
161
162 Returns a list
162 Returns a list
163 """
163 """
164 return []
164 return []
165
165
166 def _new_figure(self, data, fmt):
166 def _new_figure(self, data, fmt):
167 """Create a new figure file in the given format.
167 """Create a new figure file in the given format.
168
168
169 Returns a path relative to the input file.
169 Returns a path relative to the input file.
170 """
170 """
171 figname = '%s_fig_%02i.%s' % (self.infile_root,
171 figname = '%s_fig_%02i.%s' % (self.infile_root,
172 self.figures_counter, fmt)
172 self.figures_counter, fmt)
173 self.figures_counter += 1
173 self.figures_counter += 1
174 fullname = os.path.join(self.files_dir, figname)
174 fullname = os.path.join(self.files_dir, figname)
175
175
176 # Binary files are base64-encoded, SVG is already XML
176 # Binary files are base64-encoded, SVG is already XML
177 if fmt in ('png', 'jpg', 'pdf'):
177 if fmt in ('png', 'jpg', 'pdf'):
178 data = data.decode('base64')
178 data = data.decode('base64')
179 fopen = lambda fname: open(fname, 'wb')
179 fopen = lambda fname: open(fname, 'wb')
180 else:
180 else:
181 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
181 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
182
182
183 with fopen(fullname) as f:
183 with fopen(fullname) as f:
184 f.write(data)
184 f.write(data)
185
185
186 return fullname
186 return fullname
187
187
188 def render_heading(self, cell):
188 def render_heading(self, cell):
189 """convert a heading cell
189 """convert a heading cell
190
190
191 Returns list."""
191 Returns list."""
192 raise NotImplementedError
192 raise NotImplementedError
193
193
194 def render_code(self, cell):
194 def render_code(self, cell):
195 """Convert a code cell
195 """Convert a code cell
196
196
197 Returns list."""
197 Returns list."""
198 raise NotImplementedError
198 raise NotImplementedError
199
199
200 def render_markdown(self, cell):
200 def render_markdown(self, cell):
201 """convert a markdown cell
201 """convert a markdown cell
202
202
203 Returns list."""
203 Returns list."""
204 raise NotImplementedError
204 raise NotImplementedError
205
205
206 def _img_lines(self, img_file):
206 def _img_lines(self, img_file):
207 """Return list of lines to include an image file."""
207 """Return list of lines to include an image file."""
208 # Note: subclasses may choose to implement format-specific _FMT_lines
208 # Note: subclasses may choose to implement format-specific _FMT_lines
209 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
209 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
210 raise NotImplementedError
210 raise NotImplementedError
211
211
212 def render_display_data(self, output):
212 def render_display_data(self, output):
213 """convert display data from the output of a code cell
213 """convert display data from the output of a code cell
214
214
215 Returns list.
215 Returns list.
216 """
216 """
217 lines = []
217 lines = []
218
218
219 for fmt in output.keys():
219 for fmt in output.keys():
220 if fmt in ['png', 'svg', 'jpg', 'pdf']:
220 if fmt in ['png', 'svg', 'jpg', 'pdf']:
221 img_file = self._new_figure(output[fmt], fmt)
221 img_file = self._new_figure(output[fmt], fmt)
222 # Subclasses can have format-specific render functions (e.g.,
222 # Subclasses can have format-specific render functions (e.g.,
223 # latex has to auto-convert all SVG to PDF first).
223 # latex has to auto-convert all SVG to PDF first).
224 lines_fun = getattr(self, '_%s_lines' % fmt, None)
224 lines_fun = getattr(self, '_%s_lines' % fmt, None)
225 if not lines_fun:
225 if not lines_fun:
226 lines_fun = self._img_lines
226 lines_fun = self._img_lines
227 lines.extend(lines_fun(img_file))
227 lines.extend(lines_fun(img_file))
228 elif fmt != 'output_type':
228 elif fmt != 'output_type':
229 conv_fn = self.dispatch_display_format(fmt)
229 conv_fn = self.dispatch_display_format(fmt)
230 lines.extend(conv_fn(output))
230 lines.extend(conv_fn(output))
231 return lines
231 return lines
232
232
233 def render_raw(self, cell):
233 def render_raw(self, cell):
234 """convert a cell with raw text
234 """convert a cell with raw text
235
235
236 Returns list."""
236 Returns list."""
237 raise NotImplementedError
237 raise NotImplementedError
238
238
239 def render_unknown(self, cell):
239 def render_unknown(self, cell):
240 """Render cells of unkown type
240 """Render cells of unkown type
241
241
242 Returns list."""
242 Returns list."""
243 data = pprint.pformat(cell)
243 data = pprint.pformat(cell)
244 logging.warning('Unknown cell: %s' % cell.cell_type)
244 logging.warning('Unknown cell: %s' % cell.cell_type)
245 return self._unknown_lines(data)
245 return self._unknown_lines(data)
246
246
247 def render_unknown_display(self, output, type):
247 def render_unknown_display(self, output, type):
248 """Render cells of unkown type
248 """Render cells of unkown type
249
249
250 Returns list."""
250 Returns list."""
251 data = pprint.pformat(output)
251 data = pprint.pformat(output)
252 logging.warning('Unknown output: %s' % output.output_type)
252 logging.warning('Unknown output: %s' % output.output_type)
253 return self._unknown_lines(data)
253 return self._unknown_lines(data)
254
254
255 def render_stream(self, output):
255 def render_stream(self, output):
256 """render the stream part of an output
256 """render the stream part of an output
257
257
258 Returns list.
258 Returns list.
259
259
260 Identical to render_display_format_text
260 Identical to render_display_format_text
261 """
261 """
262 return self.render_display_format_text(output)
262 return self.render_display_format_text(output)
263
263
264 def render_pyout(self, output):
264 def render_pyout(self, output):
265 """convert pyout part of a code cell
265 """convert pyout part of a code cell
266
266
267 Returns list."""
267 Returns list."""
268 raise NotImplementedError
268 raise NotImplementedError
269
269
270
270
271 def render_pyerr(self, output):
271 def render_pyerr(self, output):
272 """convert pyerr part of a code cell
272 """convert pyerr part of a code cell
273
273
274 Returns list."""
274 Returns list."""
275 raise NotImplementedError
275 raise NotImplementedError
276
276
277 def _unknown_lines(self, data):
277 def _unknown_lines(self, data):
278 """Return list of lines for an unknown cell.
278 """Return list of lines for an unknown cell.
279
279
280 Parameters
280 Parameters
281 ----------
281 ----------
282 data : str
282 data : str
283 The content of the unknown data as a single string.
283 The content of the unknown data as a single string.
284 """
284 """
285 raise NotImplementedError
285 raise NotImplementedError
286
286
287 # These are the possible format types in an output node
287 # These are the possible format types in an output node
288
288
289 def render_display_format_text(self, output):
289 def render_display_format_text(self, output):
290 """render the text part of an output
290 """render the text part of an output
291
291
292 Returns list.
292 Returns list.
293 """
293 """
294 raise NotImplementedError
294 raise NotImplementedError
295
295
296 def render_display_format_html(self, output):
296 def render_display_format_html(self, output):
297 """render the html part of an output
297 """render the html part of an output
298
298
299 Returns list.
299 Returns list.
300 """
300 """
301 raise NotImplementedError
301 raise NotImplementedError
302
302
303 def render_display_format_latex(self, output):
303 def render_display_format_latex(self, output):
304 """render the latex part of an output
304 """render the latex part of an output
305
305
306 Returns list.
306 Returns list.
307 """
307 """
308 raise NotImplementedError
308 raise NotImplementedError
309
309
310 def render_display_format_json(self, output):
310 def render_display_format_json(self, output):
311 """render the json part of an output
311 """render the json part of an output
312
312
313 Returns list.
313 Returns list.
314 """
314 """
315 raise NotImplementedError
315 raise NotImplementedError
316
316
317 def render_display_format_javascript(self, output):
317 def render_display_format_javascript(self, output):
318 """render the javascript part of an output
318 """render the javascript part of an output
319
319
320 Returns list.
320 Returns list.
321 """
321 """
322 raise NotImplementedError
322 raise NotImplementedError
323
323
@@ -1,1294 +1,1294 b''
1 # An Introduction to the Scientific Python Ecosystem
1 # An Introduction to the Scientific Python Ecosystem
2
2
3 While the Python language is an excellent tool for general-purpose programming, with a highly readable syntax, rich and powerful data types (strings, lists, sets, dictionaries, arbitrary length integers, etc) and a very comprehensive standard library, it was not designed specifically for mathematical and scientific computing. Neither the language nor its standard library have facilities for the efficient representation of multidimensional datasets, tools for linear algebra and general matrix manipulations (an essential building block of virtually all technical computing), nor any data visualization facilities.
3 While the Python language is an excellent tool for general-purpose programming, with a highly readable syntax, rich and powerful data types (strings, lists, sets, dictionaries, arbitrary length integers, etc) and a very comprehensive standard library, it was not designed specifically for mathematical and scientific computing. Neither the language nor its standard library have facilities for the efficient representation of multidimensional datasets, tools for linear algebra and general matrix manipulations (an essential building block of virtually all technical computing), nor any data visualization facilities.
4
4
5 In particular, Python lists are very flexible containers that can be nested arbitrarily deep and which can hold any Python object in them, but they are poorly suited to represent efficiently common mathematical constructs like vectors and matrices. In contrast, much of our modern heritage of scientific computing has been built on top of libraries written in the Fortran language, which has native support for vectors and matrices as well as a library of mathematical functions that can efficiently operate on entire arrays at once.
5 In particular, Python lists are very flexible containers that can be nested arbitrarily deep and which can hold any Python object in them, but they are poorly suited to represent efficiently common mathematical constructs like vectors and matrices. In contrast, much of our modern heritage of scientific computing has been built on top of libraries written in the Fortran language, which has native support for vectors and matrices as well as a library of mathematical functions that can efficiently operate on entire arrays at once.
6
6
7 ## Scientific Python: a collaboration of projects built by scientists
7 ## Scientific Python: a collaboration of projects built by scientists
8
8
9 The scientific community has developed a set of related Python libraries that provide powerful array facilities, linear algebra, numerical algorithms, data visualization and more. In this appendix, we will briefly outline the tools most frequently used for this purpose, that make "Scientific Python" something far more powerful than the Python language alone.
9 The scientific community has developed a set of related Python libraries that provide powerful array facilities, linear algebra, numerical algorithms, data visualization and more. In this appendix, we will briefly outline the tools most frequently used for this purpose, that make "Scientific Python" something far more powerful than the Python language alone.
10
10
11 For reasons of space, we can only describe in some detail the central Numpy library, but below we provide links to the websites of each project where you can read their documentation in more detail.
11 For reasons of space, we can only describe in some detail the central Numpy library, but below we provide links to the websites of each project where you can read their documentation in more detail.
12
12
13 First, let's look at an overview of the basic tools that most scientists use in daily research with Python. The core of this ecosystem is composed of:
13 First, let's look at an overview of the basic tools that most scientists use in daily research with Python. The core of this ecosystem is composed of:
14
14
15 * Numpy: the basic library that most others depend on, it provides a powerful array type that can represent multidmensional datasets of many different kinds and that supports arithmetic operations. Numpy also provides a library of common mathematical functions, basic linear algebra, random number generation and Fast Fourier Transforms. Numpy can be found at [numpy.scipy.org](http://numpy.scipy.org)
15 * Numpy: the basic library that most others depend on, it provides a powerful array type that can represent multidmensional datasets of many different kinds and that supports arithmetic operations. Numpy also provides a library of common mathematical functions, basic linear algebra, random number generation and Fast Fourier Transforms. Numpy can be found at [numpy.scipy.org](http://numpy.scipy.org)
16
16
17 * Scipy: a large collection of numerical algorithms that operate on numpy arrays and provide facilities for many common tasks in scientific computing, including dense and sparse linear algebra support, optimization, special functions, statistics, n-dimensional image processing, signal processing and more. Scipy can be found at [scipy.org](http://scipy.org).
17 * Scipy: a large collection of numerical algorithms that operate on numpy arrays and provide facilities for many common tasks in scientific computing, including dense and sparse linear algebra support, optimization, special functions, statistics, n-dimensional image processing, signal processing and more. Scipy can be found at [scipy.org](http://scipy.org).
18
18
19 * Matplotlib: a data visualization library with a strong focus on producing high-quality output, it supports a variety of common scientific plot types in two and three dimensions, with precise control over the final output and format for publication-quality results. Matplotlib can also be controlled interactively allowing graphical manipulation of your data (zooming, panning, etc) and can be used with most modern user interface toolkits. It can be found at [matplotlib.sf.net](http://matplotlib.sf.net).
19 * Matplotlib: a data visualization library with a strong focus on producing high-quality output, it supports a variety of common scientific plot types in two and three dimensions, with precise control over the final output and format for publication-quality results. Matplotlib can also be controlled interactively allowing graphical manipulation of your data (zooming, panning, etc) and can be used with most modern user interface toolkits. It can be found at [matplotlib.sf.net](http://matplotlib.sf.net).
20
20
21 * IPython: while not strictly scientific in nature, IPython is the interactive environment in which many scientists spend their time. IPython provides a powerful Python shell that integrates tightly with Matplotlib and with easy access to the files and operating system, and which can execute in a terminal or in a graphical Qt console. IPython also has a web-based notebook interface that can combine code with text, mathematical expressions, figures and multimedia. It can be found at [ipython.org](http://ipython.org).
21 * IPython: while not strictly scientific in nature, IPython is the interactive environment in which many scientists spend their time. IPython provides a powerful Python shell that integrates tightly with Matplotlib and with easy access to the files and operating system, and which can execute in a terminal or in a graphical Qt console. IPython also has a web-based notebook interface that can combine code with text, mathematical expressions, figures and multimedia. It can be found at [ipython.org](http://ipython.org).
22
22
23 While each of these tools can be installed separately, in our opinion the most convenient way today of accessing them (especially on Windows and Mac computers) is to install the [Free Edition of the Enthought Python Distribution](http://www.enthought.com/products/epd_free.php) which contain all the above. Other free alternatives on Windows (but not on Macs) are [Python(x,y)](http://code.google.com/p/pythonxy) and [ Christoph Gohlke's packages page](http://www.lfd.uci.edu/~gohlke/pythonlibs).
23 While each of these tools can be installed separately, in our opinion the most convenient way today of accessing them (especially on Windows and Mac computers) is to install the [Free Edition of the Enthought Python Distribution](http://www.enthought.com/products/epd_free.php) which contain all the above. Other free alternatives on Windows (but not on Macs) are [Python(x,y)](http://code.google.com/p/pythonxy) and [ Christoph Gohlke's packages page](http://www.lfd.uci.edu/~gohlke/pythonlibs).
24
24
25 These four 'core' libraries are in practice complemented by a number of other tools for more specialized work. We will briefly list here the ones that we think are the most commonly needed:
25 These four 'core' libraries are in practice complemented by a number of other tools for more specialized work. We will briefly list here the ones that we think are the most commonly needed:
26
26
27 * Sympy: a symbolic manipulation tool that turns a Python session into a computer algebra system. It integrates with the IPython notebook, rendering results in properly typeset mathematical notation. [sympy.org](http://sympy.org).
27 * Sympy: a symbolic manipulation tool that turns a Python session into a computer algebra system. It integrates with the IPython notebook, rendering results in properly typeset mathematical notation. [sympy.org](http://sympy.org).
28
28
29 * Mayavi: sophisticated 3d data visualization; [code.enthought.com/projects/mayavi](http://code.enthought.com/projects/mayavi).
29 * Mayavi: sophisticated 3d data visualization; [code.enthought.com/projects/mayavi](http://code.enthought.com/projects/mayavi).
30
30
31 * Cython: a bridge language between Python and C, useful both to optimize performance bottlenecks in Python and to access C libraries directly; [cython.org](http://cython.org).
31 * Cython: a bridge language between Python and C, useful both to optimize performance bottlenecks in Python and to access C libraries directly; [cython.org](http://cython.org).
32
32
33 * Pandas: high-performance data structures and data analysis tools, with powerful data alignment and structural manipulation capabilities; [pandas.pydata.org](http://pandas.pydata.org).
33 * Pandas: high-performance data structures and data analysis tools, with powerful data alignment and structural manipulation capabilities; [pandas.pydata.org](http://pandas.pydata.org).
34
34
35 * Statsmodels: statistical data exploration and model estimation; [statsmodels.sourceforge.net](http://statsmodels.sourceforge.net).
35 * Statsmodels: statistical data exploration and model estimation; [statsmodels.sourceforge.net](http://statsmodels.sourceforge.net).
36
36
37 * Scikit-learn: general purpose machine learning algorithms with a common interface; [scikit-learn.org](http://scikit-learn.org).
37 * Scikit-learn: general purpose machine learning algorithms with a common interface; [scikit-learn.org](http://scikit-learn.org).
38
38
39 * Scikits-image: image processing toolbox; [scikits-image.org](http://scikits-image.org).
39 * Scikits-image: image processing toolbox; [scikits-image.org](http://scikits-image.org).
40
40
41 * NetworkX: analysis of complex networks (in the graph theoretical sense); [networkx.lanl.gov](http://networkx.lanl.gov).
41 * NetworkX: analysis of complex networks (in the graph theoretical sense); [networkx.lanl.gov](http://networkx.lanl.gov).
42
42
43 * PyTables: management of hierarchical datasets using the industry-standard HDF5 format; [www.pytables.org](http://www.pytables.org).
43 * PyTables: management of hierarchical datasets using the industry-standard HDF5 format; [www.pytables.org](http://www.pytables.org).
44
44
45 Beyond these, for any specific problem you should look on the internet first, before starting to write code from scratch. There's a good chance that someone, somewhere, has written an open source library that you can use for part or all of your problem.
45 Beyond these, for any specific problem you should look on the internet first, before starting to write code from scratch. There's a good chance that someone, somewhere, has written an open source library that you can use for part or all of your problem.
46
46
47 ## A note about the examples below
47 ## A note about the examples below
48
48
49 In all subsequent examples, you will see blocks of input code, followed by the results of the code if the code generated output. This output may include text, graphics and other result objects. These blocks of input can be pasted into your interactive IPython session or notebook for you to execute. In the print version of this document, a thin vertical bar on the left of the blocks of input and output shows which blocks go together.
49 In all subsequent examples, you will see blocks of input code, followed by the results of the code if the code generated output. This output may include text, graphics and other result objects. These blocks of input can be pasted into your interactive IPython session or notebook for you to execute. In the print version of this document, a thin vertical bar on the left of the blocks of input and output shows which blocks go together.
50
50
51 If you are reading this text as an actual IPython notebook, you can press `Shift-Enter` or use the 'play' button on the toolbar (right-pointing triangle) to execute each block of code, known as a 'cell' in IPython:
51 If you are reading this text as an actual IPython notebook, you can press `Shift-Enter` or use the 'play' button on the toolbar (right-pointing triangle) to execute each block of code, known as a 'cell' in IPython:
52
52
53 <div class="highlight"><pre><span class="c"># This is a block of code, below you&#39;ll see its output</span>
53 <div class="highlight"><pre><span class="c"># This is a block of code, below you&#39;ll see its output</span>
54 <span class="k">print</span> <span class="s">&quot;Welcome to the world of scientific computing with Python!&quot;</span>
54 <span class="k">print</span> <span class="s">&quot;Welcome to the world of scientific computing with Python!&quot;</span>
55 </pre></div>
55 </pre></div>
56
56
57
57
58 Welcome to the world of scientific computing with Python!
58 Welcome to the world of scientific computing with Python!
59
59
60
60
61 # Motivation: the trapezoidal rule
61 # Motivation: the trapezoidal rule
62
62
63 In subsequent sections we'll provide a basic introduction to the nuts and bolts of the basic scientific python tools; but we'll first motivate it with a brief example that illustrates what you can do in a few lines with these tools. For this, we will use the simple problem of approximating a definite integral with the trapezoid rule:
63 In subsequent sections we'll provide a basic introduction to the nuts and bolts of the basic scientific python tools; but we'll first motivate it with a brief example that illustrates what you can do in a few lines with these tools. For this, we will use the simple problem of approximating a definite integral with the trapezoid rule:
64
64
65 $$
65 $$
66 \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
66 \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
67 $$
67 $$
68
68
69 Our task will be to compute this formula for a function such as:
69 Our task will be to compute this formula for a function such as:
70
70
71 $$
71 $$
72 f(x) = (x-3)(x-5)(x-7)+85
72 f(x) = (x-3)(x-5)(x-7)+85
73 $$
73 $$
74
74
75 integrated between $a=1$ and $b=9$.
75 integrated between $a=1$ and $b=9$.
76
76
77 First, we define the function and sample it evenly between 0 and 10 at 200 points:
77 First, we define the function and sample it evenly between 0 and 10 at 200 points:
78
78
79 <div class="highlight"><pre><span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
79 <div class="highlight"><pre><span class="k">def</span> <span class="nf">f</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
80 <span class="k">return</span> <span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="mi">3</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="mi">5</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="mi">7</span><span class="p">)</span><span class="o">+</span><span class="mi">85</span>
80 <span class="k">return</span> <span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="mi">3</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="mi">5</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">x</span><span class="o">-</span><span class="mi">7</span><span class="p">)</span><span class="o">+</span><span class="mi">85</span>
81
81
82 <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
82 <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
83 <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
83 <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
84 <span class="n">y</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
84 <span class="n">y</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
85 </pre></div>
85 </pre></div>
86
86
87
87
88
88
89 We select $a$ and $b$, our integration limits, and we take only a few points in that region to illustrate the error behavior of the trapezoid approximation:
89 We select $a$ and $b$, our integration limits, and we take only a few points in that region to illustrate the error behavior of the trapezoid approximation:
90
90
91 <div class="highlight"><pre><span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">9</span>
91 <div class="highlight"><pre><span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">9</span>
92 <span class="n">xint</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="n">logical_and</span><span class="p">(</span><span class="n">x</span><span class="o">&gt;=</span><span class="n">a</span><span class="p">,</span> <span class="n">x</span><span class="o">&lt;=</span><span class="n">b</span><span class="p">)][::</span><span class="mi">30</span><span class="p">]</span>
92 <span class="n">xint</span> <span class="o">=</span> <span class="n">x</span><span class="p">[</span><span class="n">logical_and</span><span class="p">(</span><span class="n">x</span><span class="o">&gt;=</span><span class="n">a</span><span class="p">,</span> <span class="n">x</span><span class="o">&lt;=</span><span class="n">b</span><span class="p">)][::</span><span class="mi">30</span><span class="p">]</span>
93 <span class="n">yint</span> <span class="o">=</span> <span class="n">y</span><span class="p">[</span><span class="n">logical_and</span><span class="p">(</span><span class="n">x</span><span class="o">&gt;=</span><span class="n">a</span><span class="p">,</span> <span class="n">x</span><span class="o">&lt;=</span><span class="n">b</span><span class="p">)][::</span><span class="mi">30</span><span class="p">]</span>
93 <span class="n">yint</span> <span class="o">=</span> <span class="n">y</span><span class="p">[</span><span class="n">logical_and</span><span class="p">(</span><span class="n">x</span><span class="o">&gt;=</span><span class="n">a</span><span class="p">,</span> <span class="n">x</span><span class="o">&lt;=</span><span class="n">b</span><span class="p">)][::</span><span class="mi">30</span><span class="p">]</span>
94 </pre></div>
94 </pre></div>
95
95
96
96
97
97
98 Let's plot both the function and the area below it in the trapezoid approximation:
98 Let's plot both the function and the area below it in the trapezoid approximation:
99
99
100 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
100 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
101 <span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">lw</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
101 <span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">lw</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
102 <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">140</span><span class="p">])</span>
102 <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">140</span><span class="p">])</span>
103 <span class="n">plt</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">xint</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">yint</span><span class="p">,</span> <span class="n">facecolor</span><span class="o">=</span><span class="s">&#39;gray&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.4</span><span class="p">)</span>
103 <span class="n">plt</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">xint</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">yint</span><span class="p">,</span> <span class="n">facecolor</span><span class="o">=</span><span class="s">&#39;gray&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.4</span><span class="p">)</span>
104 <span class="n">plt</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="mf">0.5</span> <span class="o">*</span> <span class="p">(</span><span class="n">a</span> <span class="o">+</span> <span class="n">b</span><span class="p">),</span> <span class="mi">30</span><span class="p">,</span><span class="s">r&quot;$\int_a^b f(x)dx$&quot;</span><span class="p">,</span> <span class="n">horizontalalignment</span><span class="o">=</span><span class="s">&#39;center&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">20</span><span class="p">);</span>
104 <span class="n">plt</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="mf">0.5</span> <span class="o">*</span> <span class="p">(</span><span class="n">a</span> <span class="o">+</span> <span class="n">b</span><span class="p">),</span> <span class="mi">30</span><span class="p">,</span><span class="s">r&quot;$\int_a^b f(x)dx$&quot;</span><span class="p">,</span> <span class="n">horizontalalignment</span><span class="o">=</span><span class="s">&#39;center&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">20</span><span class="p">);</span>
105 </pre></div>
105 </pre></div>
106
106
107
107
108
108
109 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.svg)
109 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.svg)
110
110
111
111
112 Compute the integral both at high accuracy and with the trapezoid approximation
112 Compute the integral both at high accuracy and with the trapezoid approximation
113
113
114 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">scipy.integrate</span> <span class="kn">import</span> <span class="n">quad</span><span class="p">,</span> <span class="n">trapz</span>
114 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">scipy.integrate</span> <span class="kn">import</span> <span class="n">quad</span><span class="p">,</span> <span class="n">trapz</span>
115 <span class="n">integral</span><span class="p">,</span> <span class="n">error</span> <span class="o">=</span> <span class="n">quad</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">9</span><span class="p">)</span>
115 <span class="n">integral</span><span class="p">,</span> <span class="n">error</span> <span class="o">=</span> <span class="n">quad</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">9</span><span class="p">)</span>
116 <span class="n">trap_integral</span> <span class="o">=</span> <span class="n">trapz</span><span class="p">(</span><span class="n">yint</span><span class="p">,</span> <span class="n">xint</span><span class="p">)</span>
116 <span class="n">trap_integral</span> <span class="o">=</span> <span class="n">trapz</span><span class="p">(</span><span class="n">yint</span><span class="p">,</span> <span class="n">xint</span><span class="p">)</span>
117 <span class="k">print</span> <span class="s">&quot;The integral is: </span><span class="si">%g</span><span class="s"> +/- </span><span class="si">%.1e</span><span class="s">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">integral</span><span class="p">,</span> <span class="n">error</span><span class="p">)</span>
117 <span class="k">print</span> <span class="s">&quot;The integral is: </span><span class="si">%g</span><span class="s"> +/- </span><span class="si">%.1e</span><span class="s">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">integral</span><span class="p">,</span> <span class="n">error</span><span class="p">)</span>
118 <span class="k">print</span> <span class="s">&quot;The trapezoid approximation with&quot;</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">xint</span><span class="p">),</span> <span class="s">&quot;points is:&quot;</span><span class="p">,</span> <span class="n">trap_integral</span>
118 <span class="k">print</span> <span class="s">&quot;The trapezoid approximation with&quot;</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">xint</span><span class="p">),</span> <span class="s">&quot;points is:&quot;</span><span class="p">,</span> <span class="n">trap_integral</span>
119 <span class="k">print</span> <span class="s">&quot;The absolute error is:&quot;</span><span class="p">,</span> <span class="nb">abs</span><span class="p">(</span><span class="n">integral</span> <span class="o">-</span> <span class="n">trap_integral</span><span class="p">)</span>
119 <span class="k">print</span> <span class="s">&quot;The absolute error is:&quot;</span><span class="p">,</span> <span class="nb">abs</span><span class="p">(</span><span class="n">integral</span> <span class="o">-</span> <span class="n">trap_integral</span><span class="p">)</span>
120 </pre></div>
120 </pre></div>
121
121
122
122
123 The integral is: 680 +/- 7.5e-12
123 The integral is: 680 +/- 7.5e-12
124 The trapezoid approximation with 6 points is: 621.286411141
124 The trapezoid approximation with 6 points is: 621.286411141
125 The absolute error is: 58.7135888589
125 The absolute error is: 58.7135888589
126
126
127
127
128 This simple example showed us how, combining the numpy, scipy and matplotlib libraries we can provide an illustration of a standard method in elementary calculus with just a few lines of code. We will now discuss with more detail the basic usage of these tools.
128 This simple example showed us how, combining the numpy, scipy and matplotlib libraries we can provide an illustration of a standard method in elementary calculus with just a few lines of code. We will now discuss with more detail the basic usage of these tools.
129
129
130 # NumPy arrays: the right data structure for scientific computing
130 # NumPy arrays: the right data structure for scientific computing
131
131
132 ## Basics of Numpy arrays
132 ## Basics of Numpy arrays
133
133
134 We now turn our attention to the Numpy library, which forms the base layer for the entire 'scipy ecosystem'. Once you have installed numpy, you can import it as
134 We now turn our attention to the Numpy library, which forms the base layer for the entire 'scipy ecosystem'. Once you have installed numpy, you can import it as
135
135
136 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">numpy</span>
136 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">numpy</span>
137 </pre></div>
137 </pre></div>
138
138
139
139
140
140
141 though in this book we will use the common shorthand
141 though in this book we will use the common shorthand
142
142
143 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
143 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>
144 </pre></div>
144 </pre></div>
145
145
146
146
147
147
148 As mentioned above, the main object provided by numpy is a powerful array. We'll start by exploring how the numpy array differs from Python lists. We start by creating a simple list and an array with the same contents of the list:
148 As mentioned above, the main object provided by numpy is a powerful array. We'll start by exploring how the numpy array differs from Python lists. We start by creating a simple list and an array with the same contents of the list:
149
149
150 <div class="highlight"><pre><span class="n">lst</span> <span class="o">=</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">]</span>
150 <div class="highlight"><pre><span class="n">lst</span> <span class="o">=</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">]</span>
151 <span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">])</span>
151 <span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">])</span>
152 </pre></div>
152 </pre></div>
153
153
154
154
155
155
156 Elements of a one-dimensional array are accessed with the same syntax as a list:
156 Elements of a one-dimensional array are accessed with the same syntax as a list:
157
157
158 <div class="highlight"><pre><span class="n">lst</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
158 <div class="highlight"><pre><span class="n">lst</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
159 </pre></div>
159 </pre></div>
160
160
161
161
162 <pre>
162 <pre>
163 10
163 10
164 </pre>
164 </pre>
165
165
166
166
167 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
167 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
168 </pre></div>
168 </pre></div>
169
169
170
170
171 <pre>
171 <pre>
172 10
172 10
173 </pre>
173 </pre>
174
174
175
175
176 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
176 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
177 </pre></div>
177 </pre></div>
178
178
179
179
180 <pre>
180 <pre>
181 40
181 40
182 </pre>
182 </pre>
183
183
184
184
185 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
185 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span>
186 </pre></div>
186 </pre></div>
187
187
188
188
189 <pre>
189 <pre>
190 array([30, 40])
190 array([30, 40])
191 </pre>
191 </pre>
192
192
193
193
194 The first difference to note between lists and arrays is that arrays are *homogeneous*; i.e. all elements of an array must be of the same type. In contrast, lists can contain elements of arbitrary type. For example, we can change the last element in our list above to be a string:
194 The first difference to note between lists and arrays is that arrays are *homogeneous*; i.e. all elements of an array must be of the same type. In contrast, lists can contain elements of arbitrary type. For example, we can change the last element in our list above to be a string:
195
195
196 <div class="highlight"><pre><span class="n">lst</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="s">&#39;a string inside a list&#39;</span>
196 <div class="highlight"><pre><span class="n">lst</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="s">&#39;a string inside a list&#39;</span>
197 <span class="n">lst</span>
197 <span class="n">lst</span>
198 </pre></div>
198 </pre></div>
199
199
200
200
201 <pre>
201 <pre>
202 [10, 20, 30, 'a string inside a list']
202 [10, 20, 30, 'a string inside a list']
203 </pre>
203 </pre>
204
204
205
205
206 but the same can not be done with an array, as we get an error message:
206 but the same can not be done with an array, as we get an error message:
207
207
208 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="s">&#39;a string inside an array&#39;</span>
208 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="s">&#39;a string inside an array&#39;</span>
209 </pre></div>
209 </pre></div>
210
210
211
211
212 ---------------------------------------------------------------------------
212 ---------------------------------------------------------------------------
213 ValueError Traceback (most recent call last)
213 ValueError Traceback (most recent call last)
214 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
214 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
215 ----> 1 arr[-1] = 'a string inside an array'
215 ----> 1 arr[-1] = 'a string inside an array'
216
216
217 ValueError: invalid literal for long() with base 10: 'a string inside an array'
217 ValueError: invalid literal for long() with base 10: 'a string inside an array'
218
218
219
219
220 The information about the type of an array is contained in its *dtype* attribute:
220 The information about the type of an array is contained in its *dtype* attribute:
221
221
222 <div class="highlight"><pre><span class="n">arr</span><span class="o">.</span><span class="n">dtype</span>
222 <div class="highlight"><pre><span class="n">arr</span><span class="o">.</span><span class="n">dtype</span>
223 </pre></div>
223 </pre></div>
224
224
225
225
226 <pre>
226 <pre>
227 dtype('int32')
227 dtype('int32')
228 </pre>
228 </pre>
229
229
230
230
231 Once an array has been created, its dtype is fixed and it can only store elements of the same type. For this example where the dtype is integer, if we store a floating point number it will be automatically converted into an integer:
231 Once an array has been created, its dtype is fixed and it can only store elements of the same type. For this example where the dtype is integer, if we store a floating point number it will be automatically converted into an integer:
232
232
233 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="mf">1.234</span>
233 <div class="highlight"><pre><span class="n">arr</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="mf">1.234</span>
234 <span class="n">arr</span>
234 <span class="n">arr</span>
235 </pre></div>
235 </pre></div>
236
236
237
237
238 <pre>
238 <pre>
239 array([10, 20, 30, 1])
239 array([10, 20, 30, 1])
240 </pre>
240 </pre>
241
241
242
242
243 Above we created an array from an existing list; now let us now see other ways in which we can create arrays, which we'll illustrate next. A common need is to have an array initialized with a constant value, and very often this value is 0 or 1 (suitable as starting value for additive and multiplicative loops respectively); `zeros` creates arrays of all zeros, with any desired dtype:
243 Above we created an array from an existing list; now let us now see other ways in which we can create arrays, which we'll illustrate next. A common need is to have an array initialized with a constant value, and very often this value is 0 or 1 (suitable as starting value for additive and multiplicative loops respectively); `zeros` creates arrays of all zeros, with any desired dtype:
244
244
245 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="nb">float</span><span class="p">)</span>
245 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="nb">float</span><span class="p">)</span>
246 </pre></div>
246 </pre></div>
247
247
248
248
249 <pre>
249 <pre>
250 array([ 0., 0., 0., 0., 0.])
250 array([ 0., 0., 0., 0., 0.])
251 </pre>
251 </pre>
252
252
253
253
254 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span>
254 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span>
255 </pre></div>
255 </pre></div>
256
256
257
257
258 <pre>
258 <pre>
259 array([0, 0, 0])
259 array([0, 0, 0])
260 </pre>
260 </pre>
261
261
262
262
263 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="nb">complex</span><span class="p">)</span>
263 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="nb">complex</span><span class="p">)</span>
264 </pre></div>
264 </pre></div>
265
265
266
266
267 <pre>
267 <pre>
268 array([ 0.+0.j, 0.+0.j, 0.+0.j])
268 array([ 0.+0.j, 0.+0.j, 0.+0.j])
269 </pre>
269 </pre>
270
270
271
271
272 and similarly for `ones`:
272 and similarly for `ones`:
273
273
274 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;5 ones:&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
274 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;5 ones:&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
275 </pre></div>
275 </pre></div>
276
276
277
277
278 5 ones: [ 1. 1. 1. 1. 1.]
278 5 ones: [ 1. 1. 1. 1. 1.]
279
279
280
280
281 If we want an array initialized with an arbitrary value, we can create an empty array and then use the fill method to put the value we want into the array:
281 If we want an array initialized with an arbitrary value, we can create an empty array and then use the fill method to put the value we want into the array:
282
282
283 <div class="highlight"><pre><span class="n">a</span> <span class="o">=</span> <span class="n">empty</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
283 <div class="highlight"><pre><span class="n">a</span> <span class="o">=</span> <span class="n">empty</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
284 <span class="n">a</span><span class="o">.</span><span class="n">fill</span><span class="p">(</span><span class="mf">5.5</span><span class="p">)</span>
284 <span class="n">a</span><span class="o">.</span><span class="n">fill</span><span class="p">(</span><span class="mf">5.5</span><span class="p">)</span>
285 <span class="n">a</span>
285 <span class="n">a</span>
286 </pre></div>
286 </pre></div>
287
287
288
288
289 <pre>
289 <pre>
290 array([ 5.5, 5.5, 5.5, 5.5])
290 array([ 5.5, 5.5, 5.5, 5.5])
291 </pre>
291 </pre>
292
292
293
293
294 Numpy also offers the `arange` function, which works like the builtin `range` but returns an array instead of a list:
294 Numpy also offers the `arange` function, which works like the builtin `range` but returns an array instead of a list:
295
295
296 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
296 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
297 </pre></div>
297 </pre></div>
298
298
299
299
300 <pre>
300 <pre>
301 array([0, 1, 2, 3, 4])
301 array([0, 1, 2, 3, 4])
302 </pre>
302 </pre>
303
303
304
304
305 and the `linspace` and `logspace` functions to create linearly and logarithmically-spaced grids respectively, with a fixed number of points and including both ends of the specified interval:
305 and the `linspace` and `logspace` functions to create linearly and logarithmically-spaced grids respectively, with a fixed number of points and including both ends of the specified interval:
306
306
307 <div class="highlight"><pre><span class="k">print</span> <span class="s">&quot;A linear grid between 0 and 1:&quot;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
307 <div class="highlight"><pre><span class="k">print</span> <span class="s">&quot;A linear grid between 0 and 1:&quot;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
308 <span class="k">print</span> <span class="s">&quot;A logarithmic grid between 10**1 and 10**4: &quot;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
308 <span class="k">print</span> <span class="s">&quot;A logarithmic grid between 10**1 and 10**4: &quot;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
309 </pre></div>
309 </pre></div>
310
310
311
311
312 A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
312 A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
313 A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
313 A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
314
314
315
315
316 Finally, it is often useful to create arrays with random numbers that follow a specific distribution. The `np.random` module contains a number of functions that can be used to this effect, for example this will produce an array of 5 random samples taken from a standard normal distribution (0 mean and variance 1):
316 Finally, it is often useful to create arrays with random numbers that follow a specific distribution. The `np.random` module contains a number of functions that can be used to this effect, for example this will produce an array of 5 random samples taken from a standard normal distribution (0 mean and variance 1):
317
317
318 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
318 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
319 </pre></div>
319 </pre></div>
320
320
321
321
322 <pre>
322 <pre>
323 array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
323 array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
324 </pre>
324 </pre>
325
325
326
326
327 whereas this will also give 5 samples, but from a normal distribution with a mean of 10 and a variance of 3:
327 whereas this will also give 5 samples, but from a normal distribution with a mean of 10 and a variance of 3:
328
328
329 <div class="highlight"><pre><span class="n">norm10</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
329 <div class="highlight"><pre><span class="n">norm10</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
330 <span class="n">norm10</span>
330 <span class="n">norm10</span>
331 </pre></div>
331 </pre></div>
332
332
333
333
334 <pre>
334 <pre>
335 array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
335 array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
336 </pre>
336 </pre>
337
337
338
338
339 ## Indexing with other arrays
339 ## Indexing with other arrays
340
340
341 Above we saw how to index arrays with single numbers and slices, just like Python lists. But arrays allow for a more sophisticated kind of indexing which is very powerful: you can index an array with another array, and in particular with an array of boolean values. This is particluarly useful to extract information from an array that matches a certain condition.
341 Above we saw how to index arrays with single numbers and slices, just like Python lists. But arrays allow for a more sophisticated kind of indexing which is very powerful: you can index an array with another array, and in particular with an array of boolean values. This is particluarly useful to extract information from an array that matches a certain condition.
342
342
343 Consider for example that in the array `norm10` we want to replace all values above 9 with the value 0. We can do so by first finding the *mask* that indicates where this condition is true or false:
343 Consider for example that in the array `norm10` we want to replace all values above 9 with the value 0. We can do so by first finding the *mask* that indicates where this condition is true or false:
344
344
345 <div class="highlight"><pre><span class="n">mask</span> <span class="o">=</span> <span class="n">norm10</span> <span class="o">&gt;</span> <span class="mi">9</span>
345 <div class="highlight"><pre><span class="n">mask</span> <span class="o">=</span> <span class="n">norm10</span> <span class="o">&gt;</span> <span class="mi">9</span>
346 <span class="n">mask</span>
346 <span class="n">mask</span>
347 </pre></div>
347 </pre></div>
348
348
349
349
350 <pre>
350 <pre>
351 array([False, False, False, True, True], dtype=bool)
351 array([False, False, False, True, True], dtype=bool)
352 </pre>
352 </pre>
353
353
354
354
355 Now that we have this mask, we can use it to either read those values or to reset them to 0:
355 Now that we have this mask, we can use it to either read those values or to reset them to 0:
356
356
357 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Values above 9:&#39;</span><span class="p">,</span> <span class="n">norm10</span><span class="p">[</span><span class="n">mask</span><span class="p">]</span>
357 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Values above 9:&#39;</span><span class="p">,</span> <span class="n">norm10</span><span class="p">[</span><span class="n">mask</span><span class="p">]</span>
358 </pre></div>
358 </pre></div>
359
359
360
360
361 Values above 9: [ 12.14944165 11.56209294]
361 Values above 9: [ 12.14944165 11.56209294]
362
362
363
363
364 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Resetting all values above 9 to 0...&#39;</span>
364 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Resetting all values above 9 to 0...&#39;</span>
365 <span class="n">norm10</span><span class="p">[</span><span class="n">mask</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
365 <span class="n">norm10</span><span class="p">[</span><span class="n">mask</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
366 <span class="k">print</span> <span class="n">norm10</span>
366 <span class="k">print</span> <span class="n">norm10</span>
367 </pre></div>
367 </pre></div>
368
368
369
369
370 Resetting all values above 9 to 0...
370 Resetting all values above 9 to 0...
371 [ 8.94879575 5.53038269 8.24847281 0. 0. ]
371 [ 8.94879575 5.53038269 8.24847281 0. 0. ]
372
372
373
373
374 ## Arrays with more than one dimension
374 ## Arrays with more than one dimension
375
375
376 Up until now all our examples have used one-dimensional arrays. But Numpy can create arrays of aribtrary dimensions, and all the methods illustrated in the previous section work with more than one dimension. For example, a list of lists can be used to initialize a two dimensional array:
376 Up until now all our examples have used one-dimensional arrays. But Numpy can create arrays of aribtrary dimensions, and all the methods illustrated in the previous section work with more than one dimension. For example, a list of lists can be used to initialize a two dimensional array:
377
377
378 <div class="highlight"><pre><span class="n">lst2</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]]</span>
378 <div class="highlight"><pre><span class="n">lst2</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]]</span>
379 <span class="n">arr2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]])</span>
379 <span class="n">arr2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]])</span>
380 <span class="n">arr2</span>
380 <span class="n">arr2</span>
381 </pre></div>
381 </pre></div>
382
382
383
383
384 <pre>
384 <pre>
385 array([[1, 2],
385 array([[1, 2],
386 [3, 4]])
386 [3, 4]])
387 </pre>
387 </pre>
388
388
389
389
390 With two-dimensional arrays we start seeing the power of numpy: while a nested list can be indexed using repeatedly the `[ ]` operator, multidimensional arrays support a much more natural indexing syntax with a single `[ ]` and a set of indices separated by commas:
390 With two-dimensional arrays we start seeing the power of numpy: while a nested list can be indexed using repeatedly the `[ ]` operator, multidimensional arrays support a much more natural indexing syntax with a single `[ ]` and a set of indices separated by commas:
391
391
392 <div class="highlight"><pre><span class="k">print</span> <span class="n">lst2</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
392 <div class="highlight"><pre><span class="k">print</span> <span class="n">lst2</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
393 <span class="k">print</span> <span class="n">arr2</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">]</span>
393 <span class="k">print</span> <span class="n">arr2</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">]</span>
394 </pre></div>
394 </pre></div>
395
395
396
396
397 2
397 2
398 2
398 2
399
399
400
400
401 Most of the array creation functions listed above can be used with more than one dimension, for example:
401 Most of the array creation functions listed above can be used with more than one dimension, for example:
402
402
403 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">))</span>
403 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">))</span>
404 </pre></div>
404 </pre></div>
405
405
406
406
407 <pre>
407 <pre>
408 array([[ 0., 0., 0.],
408 array([[ 0., 0., 0.],
409 [ 0., 0., 0.]])
409 [ 0., 0., 0.]])
410 </pre>
410 </pre>
411
411
412
412
413 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">))</span>
413 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">))</span>
414 </pre></div>
414 </pre></div>
415
415
416
416
417 <pre>
417 <pre>
418 array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
418 array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
419 [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
419 [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
420 </pre>
420 </pre>
421
421
422
422
423 In fact, the shape of an array can be changed at any time, as long as the total number of elements is unchanged. For example, if we want a 2x4 array with numbers increasing from 0, the easiest way to create it is:
423 In fact, the shape of an array can be changed at any time, as long as the total number of elements is unchanged. For example, if we want a 2x4 array with numbers increasing from 0, the easiest way to create it is:
424
424
425 <div class="highlight"><pre><span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">8</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="mi">4</span><span class="p">)</span>
425 <div class="highlight"><pre><span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">8</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span><span class="mi">4</span><span class="p">)</span>
426 <span class="k">print</span> <span class="n">arr</span>
426 <span class="k">print</span> <span class="n">arr</span>
427 </pre></div>
427 </pre></div>
428
428
429
429
430 [[0 1 2 3]
430 [[0 1 2 3]
431 [4 5 6 7]]
431 [4 5 6 7]]
432
432
433
433
434 With multidimensional arrays, you can also use slices, and you can mix and match slices and single indices in the different dimensions (using the same array as above):
434 With multidimensional arrays, you can also use slices, and you can mix and match slices and single indices in the different dimensions (using the same array as above):
435
435
436 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Slicing in the second row:&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span>
436 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Slicing in the second row:&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span>
437 <span class="k">print</span> <span class="s">&#39;All rows, third column :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">]</span>
437 <span class="k">print</span> <span class="s">&#39;All rows, third column :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">]</span>
438 </pre></div>
438 </pre></div>
439
439
440
440
441 Slicing in the second row: [6 7]
441 Slicing in the second row: [6 7]
442 All rows, third column : [2 6]
442 All rows, third column : [2 6]
443
443
444
444
445 If you only provide one index, then you will get an array with one less dimension containing that row:
445 If you only provide one index, then you will get an array with one less dimension containing that row:
446
446
447 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;First row: &#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
447 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;First row: &#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
448 <span class="k">print</span> <span class="s">&#39;Second row: &#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
448 <span class="k">print</span> <span class="s">&#39;Second row: &#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
449 </pre></div>
449 </pre></div>
450
450
451
451
452 First row: [0 1 2 3]
452 First row: [0 1 2 3]
453 Second row: [4 5 6 7]
453 Second row: [4 5 6 7]
454
454
455
455
456 Now that we have seen how to create arrays with more than one dimension, it's a good idea to look at some of the most useful properties and methods that arrays have. The following provide basic information about the size, shape and data in the array:
456 Now that we have seen how to create arrays with more than one dimension, it's a good idea to look at some of the most useful properties and methods that arrays have. The following provide basic information about the size, shape and data in the array:
457
457
458 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Data type :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">dtype</span>
458 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Data type :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">dtype</span>
459 <span class="k">print</span> <span class="s">&#39;Total number of elements :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">size</span>
459 <span class="k">print</span> <span class="s">&#39;Total number of elements :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">size</span>
460 <span class="k">print</span> <span class="s">&#39;Number of dimensions :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">ndim</span>
460 <span class="k">print</span> <span class="s">&#39;Number of dimensions :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">ndim</span>
461 <span class="k">print</span> <span class="s">&#39;Shape (dimensionality) :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">shape</span>
461 <span class="k">print</span> <span class="s">&#39;Shape (dimensionality) :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">shape</span>
462 <span class="k">print</span> <span class="s">&#39;Memory used (in bytes) :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">nbytes</span>
462 <span class="k">print</span> <span class="s">&#39;Memory used (in bytes) :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">nbytes</span>
463 </pre></div>
463 </pre></div>
464
464
465
465
466 Data type : int32
466 Data type : int32
467 Total number of elements : 8
467 Total number of elements : 8
468 Number of dimensions : 2
468 Number of dimensions : 2
469 Shape (dimensionality) : (2, 4)
469 Shape (dimensionality) : (2, 4)
470 Memory used (in bytes) : 32
470 Memory used (in bytes) : 32
471
471
472
472
473 Arrays also have many useful methods, some especially useful ones are:
473 Arrays also have many useful methods, some especially useful ones are:
474
474
475 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Minimum and maximum :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">arr</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
475 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Minimum and maximum :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">arr</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
476 <span class="k">print</span> <span class="s">&#39;Sum and product of all elements :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">sum</span><span class="p">(),</span> <span class="n">arr</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
476 <span class="k">print</span> <span class="s">&#39;Sum and product of all elements :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">sum</span><span class="p">(),</span> <span class="n">arr</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
477 <span class="k">print</span> <span class="s">&#39;Mean and standard deviation :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> <span class="n">arr</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
477 <span class="k">print</span> <span class="s">&#39;Mean and standard deviation :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> <span class="n">arr</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
478 </pre></div>
478 </pre></div>
479
479
480
480
481 Minimum and maximum : 0 7
481 Minimum and maximum : 0 7
482 Sum and product of all elements : 28 0
482 Sum and product of all elements : 28 0
483 Mean and standard deviation : 3.5 2.29128784748
483 Mean and standard deviation : 3.5 2.29128784748
484
484
485
485
486 For these methods, the above operations area all computed on all the elements of the array. But for a multidimensional array, it's possible to do the computation along a single dimension, by passing the `axis` parameter; for example:
486 For these methods, the above operations area all computed on all the elements of the array. But for a multidimensional array, it's possible to do the computation along a single dimension, by passing the `axis` parameter; for example:
487
487
488 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;For the following array:</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span>
488 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;For the following array:</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span>
489 <span class="k">print</span> <span class="s">&#39;The sum of elements along the rows is :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
489 <span class="k">print</span> <span class="s">&#39;The sum of elements along the rows is :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
490 <span class="k">print</span> <span class="s">&#39;The sum of elements along the columns is :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
490 <span class="k">print</span> <span class="s">&#39;The sum of elements along the columns is :&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
491 </pre></div>
491 </pre></div>
492
492
493
493
494 For the following array:
494 For the following array:
495 [[0 1 2 3]
495 [[0 1 2 3]
496 [4 5 6 7]]
496 [4 5 6 7]]
497 The sum of elements along the rows is : [ 6 22]
497 The sum of elements along the rows is : [ 6 22]
498 The sum of elements along the columns is : [ 4 6 8 10]
498 The sum of elements along the columns is : [ 4 6 8 10]
499
499
500
500
501 As you can see in this example, the value of the `axis` parameter is the dimension which will be *consumed* once the operation has been carried out. This is why to sum along the rows we use `axis=0`.
501 As you can see in this example, the value of the `axis` parameter is the dimension which will be *consumed* once the operation has been carried out. This is why to sum along the rows we use `axis=0`.
502
502
503 This can be easily illustrated with an example that has more dimensions; we create an array with 4 dimensions and shape `(3,4,5,6)` and sum along the axis number 2 (i.e. the *third* axis, since in Python all counts are 0-based). That consumes the dimension whose length was 5, leaving us with a new array that has shape `(3,4,6)`:
503 This can be easily illustrated with an example that has more dimensions; we create an array with 4 dimensions and shape `(3,4,5,6)` and sum along the axis number 2 (i.e. the *third* axis, since in Python all counts are 0-based). That consumes the dimension whose length was 5, leaving us with a new array that has shape `(3,4,6)`:
504
504
505 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">6</span><span class="p">))</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">shape</span>
505 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">6</span><span class="p">))</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">shape</span>
506 </pre></div>
506 </pre></div>
507
507
508
508
509 <pre>
509 <pre>
510 (3, 4, 6)
510 (3, 4, 6)
511 </pre>
511 </pre>
512
512
513
513
514 Another widely used property of arrays is the `.T` attribute, which allows you to access the transpose of the array:
514 Another widely used property of arrays is the `.T` attribute, which allows you to access the transpose of the array:
515
515
516 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Array:</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span>
516 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;Array:</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span>
517 <span class="k">print</span> <span class="s">&#39;Transpose:</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">T</span>
517 <span class="k">print</span> <span class="s">&#39;Transpose:</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="o">.</span><span class="n">T</span>
518 </pre></div>
518 </pre></div>
519
519
520
520
521 Array:
521 Array:
522 [[0 1 2 3]
522 [[0 1 2 3]
523 [4 5 6 7]]
523 [4 5 6 7]]
524 Transpose:
524 Transpose:
525 [[0 4]
525 [[0 4]
526 [1 5]
526 [1 5]
527 [2 6]
527 [2 6]
528 [3 7]]
528 [3 7]]
529
529
530
530
531 We don't have time here to look at all the methods and properties of arrays, here's a complete list. Simply try exploring some of these IPython to learn more, or read their description in the full Numpy documentation:
531 We don't have time here to look at all the methods and properties of arrays, here's a complete list. Simply try exploring some of these IPython to learn more, or read their description in the full Numpy documentation:
532
532
533 arr.T arr.copy arr.getfield arr.put arr.squeeze
533 arr.T arr.copy arr.getfield arr.put arr.squeeze
534 arr.all arr.ctypes arr.imag arr.ravel arr.std
534 arr.all arr.ctypes arr.imag arr.ravel arr.std
535 arr.any arr.cumprod arr.item arr.real arr.strides
535 arr.any arr.cumprod arr.item arr.real arr.strides
536 arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
536 arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
537 arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
537 arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
538 arr.argsort arr.diagonal arr.max arr.resize arr.take
538 arr.argsort arr.diagonal arr.max arr.resize arr.take
539 arr.astype arr.dot arr.mean arr.round arr.tofile
539 arr.astype arr.dot arr.mean arr.round arr.tofile
540 arr.base arr.dtype arr.min arr.searchsorted arr.tolist
540 arr.base arr.dtype arr.min arr.searchsorted arr.tolist
541 arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
541 arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
542 arr.choose arr.dumps arr.ndim arr.setfield arr.trace
542 arr.choose arr.dumps arr.ndim arr.setfield arr.trace
543 arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
543 arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
544 arr.compress arr.flags arr.nonzero arr.shape arr.var
544 arr.compress arr.flags arr.nonzero arr.shape arr.var
545 arr.conj arr.flat arr.prod arr.size arr.view
545 arr.conj arr.flat arr.prod arr.size arr.view
546 arr.conjugate arr.flatten arr.ptp arr.sort
546 arr.conjugate arr.flatten arr.ptp arr.sort
547
547
548 ## Operating with arrays
548 ## Operating with arrays
549
549
550 Arrays support all regular arithmetic operators, and the numpy library also contains a complete collection of basic mathematical functions that operate on arrays. It is important to remember that in general, all operations with arrays are applied *element-wise*, i.e., are applied to all the elements of the array at the same time. Consider for example:
550 Arrays support all regular arithmetic operators, and the numpy library also contains a complete collection of basic mathematical functions that operate on arrays. It is important to remember that in general, all operations with arrays are applied *element-wise*, i.e., are applied to all the elements of the array at the same time. Consider for example:
551
551
552 <div class="highlight"><pre><span class="n">arr1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
552 <div class="highlight"><pre><span class="n">arr1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
553 <span class="n">arr2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
553 <span class="n">arr2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">14</span><span class="p">)</span>
554 <span class="k">print</span> <span class="n">arr1</span><span class="p">,</span> <span class="s">&#39;+&#39;</span><span class="p">,</span> <span class="n">arr2</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">arr1</span><span class="o">+</span><span class="n">arr2</span>
554 <span class="k">print</span> <span class="n">arr1</span><span class="p">,</span> <span class="s">&#39;+&#39;</span><span class="p">,</span> <span class="n">arr2</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">arr1</span><span class="o">+</span><span class="n">arr2</span>
555 </pre></div>
555 </pre></div>
556
556
557
557
558 [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
558 [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
559
559
560
560
561 Importantly, you must remember that even the multiplication operator is by default applied element-wise, it is *not* the matrix multiplication from linear algebra (as is the case in Matlab, for example):
561 Importantly, you must remember that even the multiplication operator is by default applied element-wise, it is *not* the matrix multiplication from linear algebra (as is the case in Matlab, for example):
562
562
563 <div class="highlight"><pre><span class="k">print</span> <span class="n">arr1</span><span class="p">,</span> <span class="s">&#39;*&#39;</span><span class="p">,</span> <span class="n">arr2</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">arr1</span><span class="o">*</span><span class="n">arr2</span>
563 <div class="highlight"><pre><span class="k">print</span> <span class="n">arr1</span><span class="p">,</span> <span class="s">&#39;*&#39;</span><span class="p">,</span> <span class="n">arr2</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">arr1</span><span class="o">*</span><span class="n">arr2</span>
564 </pre></div>
564 </pre></div>
565
565
566
566
567 [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
567 [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
568
568
569
569
570 While this means that in principle arrays must always match in their dimensionality in order for an operation to be valid, numpy will *broadcast* dimensions when possible. For example, suppose that you want to add the number 1.5 to `arr1`; the following would be a valid way to do it:
570 While this means that in principle arrays must always match in their dimensionality in order for an operation to be valid, numpy will *broadcast* dimensions when possible. For example, suppose that you want to add the number 1.5 to `arr1`; the following would be a valid way to do it:
571
571
572 <div class="highlight"><pre><span class="n">arr1</span> <span class="o">+</span> <span class="mf">1.5</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
572 <div class="highlight"><pre><span class="n">arr1</span> <span class="o">+</span> <span class="mf">1.5</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span>
573 </pre></div>
573 </pre></div>
574
574
575
575
576 <pre>
576 <pre>
577 array([ 1.5, 2.5, 3.5, 4.5])
577 array([ 1.5, 2.5, 3.5, 4.5])
578 </pre>
578 </pre>
579
579
580
580
581 But thanks to numpy's broadcasting rules, the following is equally valid:
581 But thanks to numpy's broadcasting rules, the following is equally valid:
582
582
583 <div class="highlight"><pre><span class="n">arr1</span> <span class="o">+</span> <span class="mf">1.5</span>
583 <div class="highlight"><pre><span class="n">arr1</span> <span class="o">+</span> <span class="mf">1.5</span>
584 </pre></div>
584 </pre></div>
585
585
586
586
587 <pre>
587 <pre>
588 array([ 1.5, 2.5, 3.5, 4.5])
588 array([ 1.5, 2.5, 3.5, 4.5])
589 </pre>
589 </pre>
590
590
591
591
592 In this case, numpy looked at both operands and saw that the first (`arr1`) was a one-dimensional array of length 4 and the second was a scalar, considered a zero-dimensional object. The broadcasting rules allow numpy to:
592 In this case, numpy looked at both operands and saw that the first (`arr1`) was a one-dimensional array of length 4 and the second was a scalar, considered a zero-dimensional object. The broadcasting rules allow numpy to:
593
593
594 * *create* new dimensions of length 1 (since this doesn't change the size of the array)
594 * *create* new dimensions of length 1 (since this doesn't change the size of the array)
595 * 'stretch' a dimension of length 1 that needs to be matched to a dimension of a different size.
595 * 'stretch' a dimension of length 1 that needs to be matched to a dimension of a different size.
596
596
597 So in the above example, the scalar 1.5 is effectively:
597 So in the above example, the scalar 1.5 is effectively:
598
598
599 * first 'promoted' to a 1-dimensional array of length 1
599 * first 'promoted' to a 1-dimensional array of length 1
600 * then, this array is 'stretched' to length 4 to match the dimension of `arr1`.
600 * then, this array is 'stretched' to length 4 to match the dimension of `arr1`.
601
601
602 After these two operations are complete, the addition can proceed as now both operands are one-dimensional arrays of length 4.
602 After these two operations are complete, the addition can proceed as now both operands are one-dimensional arrays of length 4.
603
603
604 This broadcasting behavior is in practice enormously powerful, especially because when numpy broadcasts to create new dimensions or to 'stretch' existing ones, it doesn't actually replicate the data. In the example above the operation is carried *as if* the 1.5 was a 1-d array with 1.5 in all of its entries, but no actual array was ever created. This can save lots of memory in cases when the arrays in question are large and can have significant performance implications.
604 This broadcasting behavior is in practice enormously powerful, especially because when numpy broadcasts to create new dimensions or to 'stretch' existing ones, it doesn't actually replicate the data. In the example above the operation is carried *as if* the 1.5 was a 1-d array with 1.5 in all of its entries, but no actual array was ever created. This can save lots of memory in cases when the arrays in question are large and can have significant performance implications.
605
605
606 The general rule is: when operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions, and works its way forward, creating dimensions of length 1 as needed. Two dimensions are considered compatible when
606 The general rule is: when operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions, and works its way forward, creating dimensions of length 1 as needed. Two dimensions are considered compatible when
607
607
608 * they are equal to begin with, or
608 * they are equal to begin with, or
609 * one of them is 1; in this case numpy will do the 'stretching' to make them equal.
609 * one of them is 1; in this case numpy will do the 'stretching' to make them equal.
610
610
611 If these conditions are not met, a `ValueError: frames are not aligned` exception is thrown, indicating that the arrays have incompatible shapes. The size of the resulting array is the maximum size along each dimension of the input arrays.
611 If these conditions are not met, a `ValueError: frames are not aligned` exception is thrown, indicating that the arrays have incompatible shapes. The size of the resulting array is the maximum size along each dimension of the input arrays.
612
612
613 This shows how the broadcasting rules work in several dimensions:
613 This shows how the broadcasting rules work in several dimensions:
614
614
615 <div class="highlight"><pre><span class="n">b</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
615 <div class="highlight"><pre><span class="n">b</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
616 <span class="k">print</span> <span class="n">arr</span><span class="p">,</span> <span class="s">&#39;</span><span class="se">\n\n</span><span class="s">+&#39;</span><span class="p">,</span> <span class="n">b</span> <span class="p">,</span> <span class="s">&#39;</span><span class="se">\n</span><span class="s">----------------</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span> <span class="o">+</span> <span class="n">b</span>
616 <span class="k">print</span> <span class="n">arr</span><span class="p">,</span> <span class="s">&#39;</span><span class="se">\n\n</span><span class="s">+&#39;</span><span class="p">,</span> <span class="n">b</span> <span class="p">,</span> <span class="s">&#39;</span><span class="se">\n</span><span class="s">----------------</span><span class="se">\n</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">arr</span> <span class="o">+</span> <span class="n">b</span>
617 </pre></div>
617 </pre></div>
618
618
619
619
620 [[0 1 2 3]
620 [[0 1 2 3]
621 [4 5 6 7]]
621 [4 5 6 7]]
622
622
623 + [2 3 4 5]
623 + [2 3 4 5]
624 ----------------
624 ----------------
625 [[ 2 4 6 8]
625 [[ 2 4 6 8]
626 [ 6 8 10 12]]
626 [ 6 8 10 12]]
627
627
628
628
629 Now, how could you use broadcasting to say add `[4, 6]` along the rows to `arr` above? Simply performing the direct addition will produce the error we previously mentioned:
629 Now, how could you use broadcasting to say add `[4, 6]` along the rows to `arr` above? Simply performing the direct addition will produce the error we previously mentioned:
630
630
631 <div class="highlight"><pre><span class="n">c</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">6</span><span class="p">])</span>
631 <div class="highlight"><pre><span class="n">c</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">6</span><span class="p">])</span>
632 <span class="n">arr</span> <span class="o">+</span> <span class="n">c</span>
632 <span class="n">arr</span> <span class="o">+</span> <span class="n">c</span>
633 </pre></div>
633 </pre></div>
634
634
635
635
636 ---------------------------------------------------------------------------
636 ---------------------------------------------------------------------------
637 ValueError Traceback (most recent call last)
637 ValueError Traceback (most recent call last)
638 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
638 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
639 1 c = np.array([4, 6])
639 1 c = np.array([4, 6])
640 ----> 2 arr + c
640 ----> 2 arr + c
641
641
642 ValueError: operands could not be broadcast together with shapes (2,4) (2)
642 ValueError: operands could not be broadcast together with shapes (2,4) (2)
643
643
644
644
645 According to the rules above, the array `c` would need to have a *trailing* dimension of 1 for the broadcasting to work. It turns out that numpy allows you to 'inject' new dimensions anywhere into an array on the fly, by indexing it with the special object `np.newaxis`:
645 According to the rules above, the array `c` would need to have a *trailing* dimension of 1 for the broadcasting to work. It turns out that numpy allows you to 'inject' new dimensions anywhere into an array on the fly, by indexing it with the special object `np.newaxis`:
646
646
647 <div class="highlight"><pre><span class="p">(</span><span class="n">c</span><span class="p">[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">])</span><span class="o">.</span><span class="n">shape</span>
647 <div class="highlight"><pre><span class="p">(</span><span class="n">c</span><span class="p">[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">])</span><span class="o">.</span><span class="n">shape</span>
648 </pre></div>
648 </pre></div>
649
649
650
650
651 <pre>
651 <pre>
652 (2, 1)
652 (2, 1)
653 </pre>
653 </pre>
654
654
655
655
656 This is exactly what we need, and indeed it works:
656 This is exactly what we need, and indeed it works:
657
657
658 <div class="highlight"><pre><span class="n">arr</span> <span class="o">+</span> <span class="n">c</span><span class="p">[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
658 <div class="highlight"><pre><span class="n">arr</span> <span class="o">+</span> <span class="n">c</span><span class="p">[:,</span> <span class="n">np</span><span class="o">.</span><span class="n">newaxis</span><span class="p">]</span>
659 </pre></div>
659 </pre></div>
660
660
661
661
662 <pre>
662 <pre>
663 array([[ 4, 5, 6, 7],
663 array([[ 4, 5, 6, 7],
664 [10, 11, 12, 13]])
664 [10, 11, 12, 13]])
665 </pre>
665 </pre>
666
666
667
667
668 For the full broadcasting rules, please see the official Numpy docs, which describe them in detail and with more complex examples.
668 For the full broadcasting rules, please see the official Numpy docs, which describe them in detail and with more complex examples.
669
669
670 As we mentioned before, Numpy ships with a full complement of mathematical functions that work on entire arrays, including logarithms, exponentials, trigonometric and hyperbolic trigonometric functions, etc. Furthermore, scipy ships a rich special function library in the `scipy.special` module that includes Bessel, Airy, Fresnel, Laguerre and other classical special functions. For example, sampling the sine function at 100 points between $0$ and $2\pi$ is as simple as:
670 As we mentioned before, Numpy ships with a full complement of mathematical functions that work on entire arrays, including logarithms, exponentials, trigonometric and hyperbolic trigonometric functions, etc. Furthermore, scipy ships a rich special function library in the `scipy.special` module that includes Bessel, Airy, Fresnel, Laguerre and other classical special functions. For example, sampling the sine function at 100 points between $0$ and $2\pi$ is as simple as:
671
671
672 <div class="highlight"><pre><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">pi</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
672 <div class="highlight"><pre><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">pi</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
673 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
673 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
674 </pre></div>
674 </pre></div>
675
675
676
676
677
677
678 ## Linear algebra in numpy
678 ## Linear algebra in numpy
679
679
680 Numpy ships with a basic linear algebra library, and all arrays have a `dot` method whose behavior is that of the scalar dot product when its arguments are vectors (one-dimensional arrays) and the traditional matrix multiplication when one or both of its arguments are two-dimensional arrays:
680 Numpy ships with a basic linear algebra library, and all arrays have a `dot` method whose behavior is that of the scalar dot product when its arguments are vectors (one-dimensional arrays) and the traditional matrix multiplication when one or both of its arguments are two-dimensional arrays:
681
681
682 <div class="highlight"><pre><span class="n">v1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
682 <div class="highlight"><pre><span class="n">v1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
683 <span class="n">v2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
683 <span class="n">v2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
684 <span class="k">print</span> <span class="n">v1</span><span class="p">,</span> <span class="s">&#39;.&#39;</span><span class="p">,</span> <span class="n">v2</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">v1</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">v2</span><span class="p">)</span>
684 <span class="k">print</span> <span class="n">v1</span><span class="p">,</span> <span class="s">&#39;.&#39;</span><span class="p">,</span> <span class="n">v2</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">v1</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">v2</span><span class="p">)</span>
685 </pre></div>
685 </pre></div>
686
686
687
687
688 [2 3 4] . [1 0 1] = 6
688 [2 3 4] . [1 0 1] = 6
689
689
690
690
691 Here is a regular matrix-vector multiplication, note that the array `v1` should be viewed as a *column* vector in traditional linear algebra notation; numpy makes no distinction between row and column vectors and simply verifies that the dimensions match the required rules of matrix multiplication, in this case we have a $2 \times 3$ matrix multiplied by a 3-vector, which produces a 2-vector:
691 Here is a regular matrix-vector multiplication, note that the array `v1` should be viewed as a *column* vector in traditional linear algebra notation; numpy makes no distinction between row and column vectors and simply verifies that the dimensions match the required rules of matrix multiplication, in this case we have a $2 \times 3$ matrix multiplied by a 3-vector, which produces a 2-vector:
692
692
693 <div class="highlight"><pre><span class="n">A</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">6</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
693 <div class="highlight"><pre><span class="n">A</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">6</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
694 <span class="k">print</span> <span class="n">A</span><span class="p">,</span> <span class="s">&#39;x&#39;</span><span class="p">,</span> <span class="n">v1</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">A</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">v1</span><span class="p">)</span>
694 <span class="k">print</span> <span class="n">A</span><span class="p">,</span> <span class="s">&#39;x&#39;</span><span class="p">,</span> <span class="n">v1</span><span class="p">,</span> <span class="s">&#39;=&#39;</span><span class="p">,</span> <span class="n">A</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">v1</span><span class="p">)</span>
695 </pre></div>
695 </pre></div>
696
696
697
697
698 [[0 1 2]
698 [[0 1 2]
699 [3 4 5]] x [2 3 4] = [11 38]
699 [3 4 5]] x [2 3 4] = [11 38]
700
700
701
701
702 For matrix-matrix multiplication, the same dimension-matching rules must be satisfied, e.g. consider the difference between $A \times A^T$:
702 For matrix-matrix multiplication, the same dimension-matching rules must be satisfied, e.g. consider the difference between $A \times A^T$:
703
703
704 <div class="highlight"><pre><span class="k">print</span> <span class="n">A</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">A</span><span class="o">.</span><span class="n">T</span><span class="p">)</span>
704 <div class="highlight"><pre><span class="k">print</span> <span class="n">A</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">A</span><span class="o">.</span><span class="n">T</span><span class="p">)</span>
705 </pre></div>
705 </pre></div>
706
706
707
707
708 [[ 5 14]
708 [[ 5 14]
709 [14 50]]
709 [14 50]]
710
710
711
711
712 and $A^T \times A$:
712 and $A^T \times A$:
713
713
714 <div class="highlight"><pre><span class="k">print</span> <span class="n">A</span><span class="o">.</span><span class="n">T</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">A</span><span class="p">)</span>
714 <div class="highlight"><pre><span class="k">print</span> <span class="n">A</span><span class="o">.</span><span class="n">T</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">A</span><span class="p">)</span>
715 </pre></div>
715 </pre></div>
716
716
717
717
718 [[ 9 12 15]
718 [[ 9 12 15]
719 [12 17 22]
719 [12 17 22]
720 [15 22 29]]
720 [15 22 29]]
721
721
722
722
723 Furthermore, the `numpy.linalg` module includes additional functionality such as determinants, matrix norms, Cholesky, eigenvalue and singular value decompositions, etc. For even more linear algebra tools, `scipy.linalg` contains the majority of the tools in the classic LAPACK libraries as well as functions to operate on sparse matrices. We refer the reader to the Numpy and Scipy documentations for additional details on these.
723 Furthermore, the `numpy.linalg` module includes additional functionality such as determinants, matrix norms, Cholesky, eigenvalue and singular value decompositions, etc. For even more linear algebra tools, `scipy.linalg` contains the majority of the tools in the classic LAPACK libraries as well as functions to operate on sparse matrices. We refer the reader to the Numpy and Scipy documentations for additional details on these.
724
724
725 ## Reading and writing arrays to disk
725 ## Reading and writing arrays to disk
726
726
727 Numpy lets you read and write arrays into files in a number of ways. In order to use these tools well, it is critical to understand the difference between a *text* and a *binary* file containing numerical data. In a text file, the number $\pi$ could be written as "3.141592653589793", for example: a string of digits that a human can read, with in this case 15 decimal digits. In contrast, that same number written to a binary file would be encoded as 8 characters (bytes) that are not readable by a human but which contain the exact same data that the variable `pi` had in the computer's memory.
727 Numpy lets you read and write arrays into files in a number of ways. In order to use these tools well, it is critical to understand the difference between a *text* and a *binary* file containing numerical data. In a text file, the number $\pi$ could be written as "3.141592653589793", for example: a string of digits that a human can read, with in this case 15 decimal digits. In contrast, that same number written to a binary file would be encoded as 8 characters (bytes) that are not readable by a human but which contain the exact same data that the variable `pi` had in the computer's memory.
728
728
729 The tradeoffs between the two modes are thus:
729 The tradeoffs between the two modes are thus:
730
730
731 * Text mode: occupies more space, precision can be lost (if not all digits are written to disk), but is readable and editable by hand with a text editor. Can *only* be used for one- and two-dimensional arrays.
731 * Text mode: occupies more space, precision can be lost (if not all digits are written to disk), but is readable and editable by hand with a text editor. Can *only* be used for one- and two-dimensional arrays.
732
732
733 * Binary mode: compact and exact representation of the data in memory, can't be read or edited by hand. Arrays of any size and dimensionality can be saved and read without loss of information.
733 * Binary mode: compact and exact representation of the data in memory, can't be read or edited by hand. Arrays of any size and dimensionality can be saved and read without loss of information.
734
734
735 First, let's see how to read and write arrays in text mode. The `np.savetxt` function saves an array to a text file, with options to control the precision, separators and even adding a header:
735 First, let's see how to read and write arrays in text mode. The `np.savetxt` function saves an array to a text file, with options to control the precision, separators and even adding a header:
736
736
737 <div class="highlight"><pre><span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
737 <div class="highlight"><pre><span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
738 <span class="n">np</span><span class="o">.</span><span class="n">savetxt</span><span class="p">(</span><span class="s">&#39;test.out&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s">&#39;</span><span class="si">%.2e</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="s">&quot;My dataset&quot;</span><span class="p">)</span>
738 <span class="n">np</span><span class="o">.</span><span class="n">savetxt</span><span class="p">(</span><span class="s">&#39;test.out&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s">&#39;</span><span class="si">%.2e</span><span class="s">&#39;</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="s">&quot;My dataset&quot;</span><span class="p">)</span>
739 <span class="o">!</span>cat test.out
739 <span class="o">!</span>cat test.out
740 </pre></div>
740 </pre></div>
741
741
742
742
743 # My dataset
743 # My dataset
744 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
744 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
745 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
745 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
746
746
747
747
748 And this same type of file can then be read with the matching `np.loadtxt` function:
748 And this same type of file can then be read with the matching `np.loadtxt` function:
749
749
750 <div class="highlight"><pre><span class="n">arr2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">loadtxt</span><span class="p">(</span><span class="s">&#39;test.out&#39;</span><span class="p">)</span>
750 <div class="highlight"><pre><span class="n">arr2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">loadtxt</span><span class="p">(</span><span class="s">&#39;test.out&#39;</span><span class="p">)</span>
751 <span class="k">print</span> <span class="n">arr2</span>
751 <span class="k">print</span> <span class="n">arr2</span>
752 </pre></div>
752 </pre></div>
753
753
754
754
755 [[ 0. 1. 2. 3. 4.]
755 [[ 0. 1. 2. 3. 4.]
756 [ 5. 6. 7. 8. 9.]]
756 [ 5. 6. 7. 8. 9.]]
757
757
758
758
759 For binary data, Numpy provides the `np.save` and `np.savez` routines. The first saves a single array to a file with `.npy` extension, while the latter can be used to save a *group* of arrays into a single file with `.npz` extension. The files created with these routines can then be read with the `np.load` function.
759 For binary data, Numpy provides the `np.save` and `np.savez` routines. The first saves a single array to a file with `.npy` extension, while the latter can be used to save a *group* of arrays into a single file with `.npz` extension. The files created with these routines can then be read with the `np.load` function.
760
760
761 Let us first see how to use the simpler `np.save` function to save a single array:
761 Let us first see how to use the simpler `np.save` function to save a single array:
762
762
763 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s">&#39;test.npy&#39;</span><span class="p">,</span> <span class="n">arr2</span><span class="p">)</span>
763 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s">&#39;test.npy&#39;</span><span class="p">,</span> <span class="n">arr2</span><span class="p">)</span>
764 <span class="c"># Now we read this back</span>
764 <span class="c"># Now we read this back</span>
765 <span class="n">arr2n</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&#39;test.npy&#39;</span><span class="p">)</span>
765 <span class="n">arr2n</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&#39;test.npy&#39;</span><span class="p">)</span>
766 <span class="c"># Let&#39;s see if any element is non-zero in the difference.</span>
766 <span class="c"># Let&#39;s see if any element is non-zero in the difference.</span>
767 <span class="c"># A value of True would be a problem.</span>
767 <span class="c"># A value of True would be a problem.</span>
768 <span class="k">print</span> <span class="s">&#39;Any differences?&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">arr2</span><span class="o">-</span><span class="n">arr2n</span><span class="p">)</span>
768 <span class="k">print</span> <span class="s">&#39;Any differences?&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">arr2</span><span class="o">-</span><span class="n">arr2n</span><span class="p">)</span>
769 </pre></div>
769 </pre></div>
770
770
771
771
772 Any differences? False
772 Any differences? False
773
773
774
774
775 Now let us see how the `np.savez` function works. You give it a filename and either a sequence of arrays or a set of keywords. In the first mode, the function will auotmatically name the saved arrays in the archive as `arr_0`, `arr_1`, etc:
775 Now let us see how the `np.savez` function works. You give it a filename and either a sequence of arrays or a set of keywords. In the first mode, the function will auotmatically name the saved arrays in the archive as `arr_0`, `arr_1`, etc:
776
776
777 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">savez</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">arr2</span><span class="p">)</span>
777 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">savez</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">,</span> <span class="n">arr</span><span class="p">,</span> <span class="n">arr2</span><span class="p">)</span>
778 <span class="n">arrays</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">)</span>
778 <span class="n">arrays</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">)</span>
779 <span class="n">arrays</span><span class="o">.</span><span class="n">files</span>
779 <span class="n">arrays</span><span class="o">.</span><span class="n">files</span>
780 </pre></div>
780 </pre></div>
781
781
782
782
783 <pre>
783 <pre>
784 ['arr_1', 'arr_0']
784 ['arr_1', 'arr_0']
785 </pre>
785 </pre>
786
786
787
787
788 Alternatively, we can explicitly choose how to name the arrays we save:
788 Alternatively, we can explicitly choose how to name the arrays we save:
789
789
790 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">savez</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">,</span> <span class="n">array1</span><span class="o">=</span><span class="n">arr</span><span class="p">,</span> <span class="n">array2</span><span class="o">=</span><span class="n">arr2</span><span class="p">)</span>
790 <div class="highlight"><pre><span class="n">np</span><span class="o">.</span><span class="n">savez</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">,</span> <span class="n">array1</span><span class="o">=</span><span class="n">arr</span><span class="p">,</span> <span class="n">array2</span><span class="o">=</span><span class="n">arr2</span><span class="p">)</span>
791 <span class="n">arrays</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">)</span>
791 <span class="n">arrays</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s">&#39;test.npz&#39;</span><span class="p">)</span>
792 <span class="n">arrays</span><span class="o">.</span><span class="n">files</span>
792 <span class="n">arrays</span><span class="o">.</span><span class="n">files</span>
793 </pre></div>
793 </pre></div>
794
794
795
795
796 <pre>
796 <pre>
797 ['array2', 'array1']
797 ['array2', 'array1']
798 </pre>
798 </pre>
799
799
800
800
801 The object returned by `np.load` from an `.npz` file works like a dictionary, though you can also access its constituent files by attribute using its special `.f` field; this is best illustrated with an example with the `arrays` object from above:
801 The object returned by `np.load` from an `.npz` file works like a dictionary, though you can also access its constituent files by attribute using its special `.f` field; this is best illustrated with an example with the `arrays` object from above:
802
802
803 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;First row of first array:&#39;</span><span class="p">,</span> <span class="n">arrays</span><span class="p">[</span><span class="s">&#39;array1&#39;</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
803 <div class="highlight"><pre><span class="k">print</span> <span class="s">&#39;First row of first array:&#39;</span><span class="p">,</span> <span class="n">arrays</span><span class="p">[</span><span class="s">&#39;array1&#39;</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
804 <span class="c"># This is an equivalent way to get the same field</span>
804 <span class="c"># This is an equivalent way to get the same field</span>
805 <span class="k">print</span> <span class="s">&#39;First row of first array:&#39;</span><span class="p">,</span> <span class="n">arrays</span><span class="o">.</span><span class="n">f</span><span class="o">.</span><span class="n">array1</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
805 <span class="k">print</span> <span class="s">&#39;First row of first array:&#39;</span><span class="p">,</span> <span class="n">arrays</span><span class="o">.</span><span class="n">f</span><span class="o">.</span><span class="n">array1</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
806 </pre></div>
806 </pre></div>
807
807
808
808
809 First row of first array: [0 1 2 3 4]
809 First row of first array: [0 1 2 3 4]
810 First row of first array: [0 1 2 3 4]
810 First row of first array: [0 1 2 3 4]
811
811
812
812
813 This `.npz` format is a very convenient way to package compactly and without loss of information, into a single file, a group of related arrays that pertain to a specific problem. At some point, however, the complexity of your dataset may be such that the optimal approach is to use one of the standard formats in scientific data processing that have been designed to handle complex datasets, such as NetCDF or HDF5.
813 This `.npz` format is a very convenient way to package compactly and without loss of information, into a single file, a group of related arrays that pertain to a specific problem. At some point, however, the complexity of your dataset may be such that the optimal approach is to use one of the standard formats in scientific data processing that have been designed to handle complex datasets, such as NetCDF or HDF5.
814
814
815 Fortunately, there are tools for manipulating these formats in Python, and for storing data in other ways such as databases. A complete discussion of the possibilities is beyond the scope of this discussion, but of particular interest for scientific users we at least mention the following:
815 Fortunately, there are tools for manipulating these formats in Python, and for storing data in other ways such as databases. A complete discussion of the possibilities is beyond the scope of this discussion, but of particular interest for scientific users we at least mention the following:
816
816
817 * The `scipy.io` module contains routines to read and write Matlab files in `.mat` format and files in the NetCDF format that is widely used in certain scientific disciplines.
817 * The `scipy.io` module contains routines to read and write Matlab files in `.mat` format and files in the NetCDF format that is widely used in certain scientific disciplines.
818
818
819 * For manipulating files in the HDF5 format, there are two excellent options in Python: The PyTables project offers a high-level, object oriented approach to manipulating HDF5 datasets, while the h5py project offers a more direct mapping to the standard HDF5 library interface. Both are excellent tools; if you need to work with HDF5 datasets you should read some of their documentation and examples and decide which approach is a better match for your needs.
819 * For manipulating files in the HDF5 format, there are two excellent options in Python: The PyTables project offers a high-level, object oriented approach to manipulating HDF5 datasets, while the h5py project offers a more direct mapping to the standard HDF5 library interface. Both are excellent tools; if you need to work with HDF5 datasets you should read some of their documentation and examples and decide which approach is a better match for your needs.
820
820
821 # High quality data visualization with Matplotlib
821 # High quality data visualization with Matplotlib
822
822
823 The [matplotlib](http://matplotlib.sf.net) library is a powerful tool capable of producing complex publication-quality figures with fine layout control in two and three dimensions; here we will only provide a minimal self-contained introduction to its usage that covers the functionality needed for the rest of the book. We encourage the reader to read the tutorials included with the matplotlib documentation as well as to browse its extensive gallery of examples that include source code.
823 The [matplotlib](http://matplotlib.sf.net) library is a powerful tool capable of producing complex publication-quality figures with fine layout control in two and three dimensions; here we will only provide a minimal self-contained introduction to its usage that covers the functionality needed for the rest of the book. We encourage the reader to read the tutorials included with the matplotlib documentation as well as to browse its extensive gallery of examples that include source code.
824
824
825 Just as we typically use the shorthand `np` for Numpy, we will use `plt` for the `matplotlib.pyplot` module where the easy-to-use plotting functions reside (the library contains a rich object-oriented architecture that we don't have the space to discuss here):
825 Just as we typically use the shorthand `np` for Numpy, we will use `plt` for the `matplotlib.pyplot` module where the easy-to-use plotting functions reside (the library contains a rich object-oriented architecture that we don't have the space to discuss here):
826
826
827 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
827 <div class="highlight"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="kn">as</span> <span class="nn">plt</span>
828 </pre></div>
828 </pre></div>
829
829
830
830
831
831
832 The most frequently used function is simply called `plot`, here is how you can make a simple plot of $\sin(x)$ for $x \in [0, 2\pi]$ with labels and a grid (we use the semicolon in the last line to suppress the display of some information that is unnecessary right now):
832 The most frequently used function is simply called `plot`, here is how you can make a simple plot of $\sin(x)$ for $x \in [0, 2\pi]$ with labels and a grid (we use the semicolon in the last line to suppress the display of some information that is unnecessary right now):
833
833
834 <div class="highlight"><pre><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">pi</span><span class="p">)</span>
834 <div class="highlight"><pre><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">pi</span><span class="p">)</span>
835 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
835 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
836 <span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&#39;sin(x)&#39;</span><span class="p">)</span>
836 <span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&#39;sin(x)&#39;</span><span class="p">)</span>
837 <span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">()</span>
837 <span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">()</span>
838 <span class="n">plt</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
838 <span class="n">plt</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
839 <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s">&#39;Harmonic&#39;</span><span class="p">)</span>
839 <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s">&#39;Harmonic&#39;</span><span class="p">)</span>
840 <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s">&#39;x&#39;</span><span class="p">)</span>
840 <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s">&#39;x&#39;</span><span class="p">)</span>
841 <span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s">&#39;y&#39;</span><span class="p">);</span>
841 <span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s">&#39;y&#39;</span><span class="p">);</span>
842 </pre></div>
842 </pre></div>
843
843
844
844
845
845
846 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.svg)
846 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.svg)
847
847
848
848
849 You can control the style, color and other properties of the markers, for example:
849 You can control the style, color and other properties of the markers, for example:
850
850
851 <div class="highlight"><pre><span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">);</span>
851 <div class="highlight"><pre><span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">);</span>
852 </pre></div>
852 </pre></div>
853
853
854
854
855
855
856 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.svg)
856 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.svg)
857
857
858
858
859 <div class="highlight"><pre><span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="s">&#39;o&#39;</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s">&#39;r&#39;</span><span class="p">);</span>
859 <div class="highlight"><pre><span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="s">&#39;o&#39;</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s">&#39;r&#39;</span><span class="p">);</span>
860 </pre></div>
860 </pre></div>
861
861
862
862
863
863
864 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.svg)
864 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.svg)
865
865
866
866
867 We will now see how to create a few other common plot types, such as a simple error plot:
867 We will now see how to create a few other common plot types, such as a simple error plot:
868
868
869 <div class="highlight"><pre><span class="c"># example data</span>
869 <div class="highlight"><pre><span class="c"># example data</span>
870 <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mf">0.1</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">)</span>
870 <span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mf">0.1</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">)</span>
871 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">x</span><span class="p">)</span>
871 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">x</span><span class="p">)</span>
872
872
873 <span class="c"># example variable error bar values</span>
873 <span class="c"># example variable error bar values</span>
874 <span class="n">yerr</span> <span class="o">=</span> <span class="mf">0.1</span> <span class="o">+</span> <span class="mf">0.2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
874 <span class="n">yerr</span> <span class="o">=</span> <span class="mf">0.1</span> <span class="o">+</span> <span class="mf">0.2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
875 <span class="n">xerr</span> <span class="o">=</span> <span class="mf">0.1</span> <span class="o">+</span> <span class="n">yerr</span>
875 <span class="n">xerr</span> <span class="o">=</span> <span class="mf">0.1</span> <span class="o">+</span> <span class="n">yerr</span>
876
876
877 <span class="c"># First illustrate basic pyplot interface, using defaults where possible.</span>
877 <span class="c"># First illustrate basic pyplot interface, using defaults where possible.</span>
878 <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">()</span>
878 <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">()</span>
879 <span class="n">plt</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">xerr</span><span class="o">=</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">yerr</span><span class="o">=</span><span class="mf">0.4</span><span class="p">)</span>
879 <span class="n">plt</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">xerr</span><span class="o">=</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">yerr</span><span class="o">=</span><span class="mf">0.4</span><span class="p">)</span>
880 <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s">&quot;Simplest errorbars, 0.2 in x, 0.4 in y&quot;</span><span class="p">);</span>
880 <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s">&quot;Simplest errorbars, 0.2 in x, 0.4 in y&quot;</span><span class="p">);</span>
881 </pre></div>
881 </pre></div>
882
882
883
883
884
884
885 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.svg)
885 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.svg)
886
886
887
887
888 A simple log plot
888 A simple log plot
889
889
890 <div class="highlight"><pre><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
890 <div class="highlight"><pre><span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
891 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">x</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
891 <span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">x</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
892 <span class="n">plt</span><span class="o">.</span><span class="n">semilogy</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">);</span>
892 <span class="n">plt</span><span class="o">.</span><span class="n">semilogy</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">);</span>
893 </pre></div>
893 </pre></div>
894
894
895
895
896
896
897 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.svg)
897 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.svg)
898
898
899
899
900 A histogram annotated with text inside the plot, using the `text` function:
900 A histogram annotated with text inside the plot, using the `text` function:
901
901
902 <div class="highlight"><pre><span class="n">mu</span><span class="p">,</span> <span class="n">sigma</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">15</span>
902 <div class="highlight"><pre><span class="n">mu</span><span class="p">,</span> <span class="n">sigma</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">15</span>
903 <span class="n">x</span> <span class="o">=</span> <span class="n">mu</span> <span class="o">+</span> <span class="n">sigma</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span>
903 <span class="n">x</span> <span class="o">=</span> <span class="n">mu</span> <span class="o">+</span> <span class="n">sigma</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">10000</span><span class="p">)</span>
904
904
905 <span class="c"># the histogram of the data</span>
905 <span class="c"># the histogram of the data</span>
906 <span class="n">n</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">patches</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">hist</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">50</span><span class="p">,</span> <span class="n">normed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">facecolor</span><span class="o">=</span><span class="s">&#39;g&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.75</span><span class="p">)</span>
906 <span class="n">n</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">patches</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">hist</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="mi">50</span><span class="p">,</span> <span class="n">normed</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">facecolor</span><span class="o">=</span><span class="s">&#39;g&#39;</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.75</span><span class="p">)</span>
907
907
908 <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s">&#39;Smarts&#39;</span><span class="p">)</span>
908 <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s">&#39;Smarts&#39;</span><span class="p">)</span>
909 <span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s">&#39;Probability&#39;</span><span class="p">)</span>
909 <span class="n">plt</span><span class="o">.</span><span class="n">ylabel</span><span class="p">(</span><span class="s">&#39;Probability&#39;</span><span class="p">)</span>
910 <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s">&#39;Histogram of IQ&#39;</span><span class="p">)</span>
910 <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s">&#39;Histogram of IQ&#39;</span><span class="p">)</span>
911 <span class="c"># This will put a text fragment at the position given:</span>
911 <span class="c"># This will put a text fragment at the position given:</span>
912 <span class="n">plt</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="mi">55</span><span class="p">,</span> <span class="o">.</span><span class="mo">027</span><span class="p">,</span> <span class="s">r&#39;$\mu=100,\ \sigma=15$&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
912 <span class="n">plt</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="mi">55</span><span class="p">,</span> <span class="o">.</span><span class="mo">027</span><span class="p">,</span> <span class="s">r&#39;$\mu=100,\ \sigma=15$&#39;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">14</span><span class="p">)</span>
913 <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">([</span><span class="mi">40</span><span class="p">,</span> <span class="mi">160</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mf">0.03</span><span class="p">])</span>
913 <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">([</span><span class="mi">40</span><span class="p">,</span> <span class="mi">160</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mf">0.03</span><span class="p">])</span>
914 <span class="n">plt</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="bp">True</span><span class="p">)</span>
914 <span class="n">plt</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="bp">True</span><span class="p">)</span>
915 </pre></div>
915 </pre></div>
916
916
917
917
918
918
919 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.svg)
919 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.svg)
920
920
921
921
922 ## Image display
922 ## Image display
923
923
924 The `imshow` command can display single or multi-channel images. A simple array of random numbers, plotted in grayscale:
924 The `imshow` command can display single or multi-channel images. A simple array of random numbers, plotted in grayscale:
925
925
926 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">matplotlib</span> <span class="kn">import</span> <span class="n">cm</span>
926 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">matplotlib</span> <span class="kn">import</span> <span class="n">cm</span>
927 <span class="n">plt</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="n">cmap</span><span class="o">=</span><span class="n">cm</span><span class="o">.</span><span class="n">gray</span><span class="p">,</span> <span class="n">interpolation</span><span class="o">=</span><span class="s">&#39;nearest&#39;</span><span class="p">);</span>
927 <span class="n">plt</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="n">cmap</span><span class="o">=</span><span class="n">cm</span><span class="o">.</span><span class="n">gray</span><span class="p">,</span> <span class="n">interpolation</span><span class="o">=</span><span class="s">&#39;nearest&#39;</span><span class="p">);</span>
928 </pre></div>
928 </pre></div>
929
929
930
930
931
931
932 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.svg)
932 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.svg)
933
933
934
934
935 A real photograph is a multichannel image, `imshow` interprets it correctly:
935 A real photograph is a multichannel image, `imshow` interprets it correctly:
936
936
937 <div class="highlight"><pre><span class="n">img</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">imread</span><span class="p">(</span><span class="s">&#39;stinkbug.png&#39;</span><span class="p">)</span>
937 <div class="highlight"><pre><span class="n">img</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">imread</span><span class="p">(</span><span class="s">&#39;stinkbug.png&#39;</span><span class="p">)</span>
938 <span class="k">print</span> <span class="s">&#39;Dimensions of the array img:&#39;</span><span class="p">,</span> <span class="n">img</span><span class="o">.</span><span class="n">shape</span>
938 <span class="k">print</span> <span class="s">&#39;Dimensions of the array img:&#39;</span><span class="p">,</span> <span class="n">img</span><span class="o">.</span><span class="n">shape</span>
939 <span class="n">plt</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">img</span><span class="p">);</span>
939 <span class="n">plt</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">img</span><span class="p">);</span>
940 </pre></div>
940 </pre></div>
941
941
942
942
943 Dimensions of the array img: (375, 500, 3)
943 Dimensions of the array img: (375, 500, 3)
944
944
945
945
946 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.svg)
946 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.svg)
947
947
948
948
949 ## Simple 3d plotting with matplotlib
949 ## Simple 3d plotting with matplotlib
950
950
951 Note that you must execute at least once in your session:
951 Note that you must execute at least once in your session:
952
952
953 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">mpl_toolkits.mplot3d</span> <span class="kn">import</span> <span class="n">Axes3D</span>
953 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">mpl_toolkits.mplot3d</span> <span class="kn">import</span> <span class="n">Axes3D</span>
954 </pre></div>
954 </pre></div>
955
955
956
956
957
957
958 One this has been done, you can create 3d axes with the `projection='3d'` keyword to `add_subplot`:
958 One this has been done, you can create 3d axes with the `projection='3d'` keyword to `add_subplot`:
959
959
960 fig = plt.figure()
960 fig = plt.figure()
961 fig.add_subplot(<other arguments here>, projection='3d')
961 fig.add_subplot(<other arguments here>, projection='3d')
962
962
963 A simple surface plot:
963 A simple surface plot:
964
964
965 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">mpl_toolkits.mplot3d.axes3d</span> <span class="kn">import</span> <span class="n">Axes3D</span>
965 <div class="highlight"><pre><span class="kn">from</span> <span class="nn">mpl_toolkits.mplot3d.axes3d</span> <span class="kn">import</span> <span class="n">Axes3D</span>
966 <span class="kn">from</span> <span class="nn">matplotlib</span> <span class="kn">import</span> <span class="n">cm</span>
966 <span class="kn">from</span> <span class="nn">matplotlib</span> <span class="kn">import</span> <span class="n">cm</span>
967
967
968 <span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">()</span>
968 <span class="n">fig</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">figure</span><span class="p">()</span>
969 <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">projection</span><span class="o">=</span><span class="s">&#39;3d&#39;</span><span class="p">)</span>
969 <span class="n">ax</span> <span class="o">=</span> <span class="n">fig</span><span class="o">.</span><span class="n">add_subplot</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">projection</span><span class="o">=</span><span class="s">&#39;3d&#39;</span><span class="p">)</span>
970 <span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">)</span>
970 <span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">)</span>
971 <span class="n">Y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">)</span>
971 <span class="n">Y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">)</span>
972 <span class="n">X</span><span class="p">,</span> <span class="n">Y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">meshgrid</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">)</span>
972 <span class="n">X</span><span class="p">,</span> <span class="n">Y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">meshgrid</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">)</span>
973 <span class="n">R</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">X</span><span class="o">**</span><span class="mi">2</span> <span class="o">+</span> <span class="n">Y</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
973 <span class="n">R</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">X</span><span class="o">**</span><span class="mi">2</span> <span class="o">+</span> <span class="n">Y</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
974 <span class="n">Z</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">R</span><span class="p">)</span>
974 <span class="n">Z</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">R</span><span class="p">)</span>
975 <span class="n">surf</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot_surface</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">,</span> <span class="n">Z</span><span class="p">,</span> <span class="n">rstride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">cstride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">cmap</span><span class="o">=</span><span class="n">cm</span><span class="o">.</span><span class="n">jet</span><span class="p">,</span>
975 <span class="n">surf</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">plot_surface</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">,</span> <span class="n">Z</span><span class="p">,</span> <span class="n">rstride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">cstride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">cmap</span><span class="o">=</span><span class="n">cm</span><span class="o">.</span><span class="n">jet</span><span class="p">,</span>
976 <span class="n">linewidth</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">antialiased</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
976 <span class="n">linewidth</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">antialiased</span><span class="o">=</span><span class="bp">False</span><span class="p">)</span>
977 <span class="n">ax</span><span class="o">.</span><span class="n">set_zlim3d</span><span class="p">(</span><span class="o">-</span><span class="mf">1.01</span><span class="p">,</span> <span class="mf">1.01</span><span class="p">);</span>
977 <span class="n">ax</span><span class="o">.</span><span class="n">set_zlim3d</span><span class="p">(</span><span class="o">-</span><span class="mf">1.01</span><span class="p">,</span> <span class="mf">1.01</span><span class="p">);</span>
978 </pre></div>
978 </pre></div>
979
979
980
980
981
981
982 ![](/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.svg)
982 ![](tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.svg)
983
983
984
984
985 # IPython: a powerful interactive environment
985 # IPython: a powerful interactive environment
986
986
987 A key component of the everyday workflow of most scientific computing environments is a good interactive environment, that is, a system in which you can execute small amounts of code and view the results immediately, combining both printing out data and opening graphical visualizations. All modern systems for scientific computing, commercial and open source, include such functionality.
987 A key component of the everyday workflow of most scientific computing environments is a good interactive environment, that is, a system in which you can execute small amounts of code and view the results immediately, combining both printing out data and opening graphical visualizations. All modern systems for scientific computing, commercial and open source, include such functionality.
988
988
989 Out of the box, Python also offers a simple interactive shell with very limited capabilities. But just like the scientific community built Numpy to provide arrays suited for scientific work (since Pytyhon's lists aren't optimal for this task), it has also developed an interactive environment much more sophisticated than the built-in one. The [IPython project](http://ipython.org) offers a set of tools to make productive use of the Python language, all the while working interactively and with immedate feedback on your results. The basic tools that IPython provides are:
989 Out of the box, Python also offers a simple interactive shell with very limited capabilities. But just like the scientific community built Numpy to provide arrays suited for scientific work (since Pytyhon's lists aren't optimal for this task), it has also developed an interactive environment much more sophisticated than the built-in one. The [IPython project](http://ipython.org) offers a set of tools to make productive use of the Python language, all the while working interactively and with immedate feedback on your results. The basic tools that IPython provides are:
990
990
991 1. A powerful terminal shell, with many features designed to increase the fluidity and productivity of everyday scientific workflows, including:
991 1. A powerful terminal shell, with many features designed to increase the fluidity and productivity of everyday scientific workflows, including:
992
992
993 * rich introspection of all objects and variables including easy access to the source code of any function
993 * rich introspection of all objects and variables including easy access to the source code of any function
994 * powerful and extensible tab completion of variables and filenames,
994 * powerful and extensible tab completion of variables and filenames,
995 * tight integration with matplotlib, supporting interactive figures that don't block the terminal,
995 * tight integration with matplotlib, supporting interactive figures that don't block the terminal,
996 * direct access to the filesystem and underlying operating system,
996 * direct access to the filesystem and underlying operating system,
997 * an extensible system for shell-like commands called 'magics' that reduce the work needed to perform many common tasks,
997 * an extensible system for shell-like commands called 'magics' that reduce the work needed to perform many common tasks,
998 * tools for easily running, timing, profiling and debugging your codes,
998 * tools for easily running, timing, profiling and debugging your codes,
999 * syntax highlighted error messages with much more detail than the default Python ones,
999 * syntax highlighted error messages with much more detail than the default Python ones,
1000 * logging and access to all previous history of inputs, including across sessions
1000 * logging and access to all previous history of inputs, including across sessions
1001
1001
1002 2. A Qt console that provides the look and feel of a terminal, but adds support for inline figures, graphical calltips, a persistent session that can survive crashes (even segfaults) of the kernel process, and more.
1002 2. A Qt console that provides the look and feel of a terminal, but adds support for inline figures, graphical calltips, a persistent session that can survive crashes (even segfaults) of the kernel process, and more.
1003
1003
1004 3. A web-based notebook that can execute code and also contain rich text and figures, mathematical equations and arbitrary HTML. This notebook presents a document-like view with cells where code is executed but that can be edited in-place, reordered, mixed with explanatory text and figures, etc.
1004 3. A web-based notebook that can execute code and also contain rich text and figures, mathematical equations and arbitrary HTML. This notebook presents a document-like view with cells where code is executed but that can be edited in-place, reordered, mixed with explanatory text and figures, etc.
1005
1005
1006 4. A high-performance, low-latency system for parallel computing that supports the control of a cluster of IPython engines communicating over a network, with optimizations that minimize unnecessary copying of large objects (especially numpy arrays).
1006 4. A high-performance, low-latency system for parallel computing that supports the control of a cluster of IPython engines communicating over a network, with optimizations that minimize unnecessary copying of large objects (especially numpy arrays).
1007
1007
1008 We will now discuss the highlights of the tools 1-3 above so that you can make them an effective part of your workflow. The topic of parallel computing is beyond the scope of this document, but we encourage you to read the extensive [documentation](http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html) and [tutorials](http://minrk.github.com/scipy-tutorial-2011/) on this available on the IPython website.
1008 We will now discuss the highlights of the tools 1-3 above so that you can make them an effective part of your workflow. The topic of parallel computing is beyond the scope of this document, but we encourage you to read the extensive [documentation](http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html) and [tutorials](http://minrk.github.com/scipy-tutorial-2011/) on this available on the IPython website.
1009
1009
1010 ## The IPython terminal
1010 ## The IPython terminal
1011
1011
1012 You can start IPython at the terminal simply by typing:
1012 You can start IPython at the terminal simply by typing:
1013
1013
1014 $ ipython
1014 $ ipython
1015
1015
1016 which will provide you some basic information about how to get started and will then open a prompt labeled `In [1]:` for you to start typing. Here we type $2^{64}$ and Python computes the result for us in exact arithmetic, returning it as `Out[1]`:
1016 which will provide you some basic information about how to get started and will then open a prompt labeled `In [1]:` for you to start typing. Here we type $2^{64}$ and Python computes the result for us in exact arithmetic, returning it as `Out[1]`:
1017
1017
1018 $ ipython
1018 $ ipython
1019 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1019 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1020 Type "copyright", "credits" or "license" for more information.
1020 Type "copyright", "credits" or "license" for more information.
1021
1021
1022 IPython 0.13.dev -- An enhanced Interactive Python.
1022 IPython 0.13.dev -- An enhanced Interactive Python.
1023 ? -> Introduction and overview of IPython's features.
1023 ? -> Introduction and overview of IPython's features.
1024 %quickref -> Quick reference.
1024 %quickref -> Quick reference.
1025 help -> Python's own help system.
1025 help -> Python's own help system.
1026 object? -> Details about 'object', use 'object??' for extra details.
1026 object? -> Details about 'object', use 'object??' for extra details.
1027
1027
1028 In [1]: 2**64
1028 In [1]: 2**64
1029 Out[1]: 18446744073709551616L
1029 Out[1]: 18446744073709551616L
1030
1030
1031 The first thing you should know about IPython is that all your inputs and outputs are saved. There are two variables named `In` and `Out` which are filled as you work with your results. Furthermore, all outputs are also saved to auto-created variables of the form `_NN` where `NN` is the prompt number, and inputs to `_iNN`. This allows you to recover quickly the result of a prior computation by referring to its number even if you forgot to store it as a variable. For example, later on in the above session you can do:
1031 The first thing you should know about IPython is that all your inputs and outputs are saved. There are two variables named `In` and `Out` which are filled as you work with your results. Furthermore, all outputs are also saved to auto-created variables of the form `_NN` where `NN` is the prompt number, and inputs to `_iNN`. This allows you to recover quickly the result of a prior computation by referring to its number even if you forgot to store it as a variable. For example, later on in the above session you can do:
1032
1032
1033 In [6]: print _1
1033 In [6]: print _1
1034 18446744073709551616
1034 18446744073709551616
1035
1035
1036 We strongly recommend that you take a few minutes to read at least the basic introduction provided by the `?` command, and keep in mind that the `%quickref` command at all times can be used as a quick reference "cheat sheet" of the most frequently used features of IPython.
1036 We strongly recommend that you take a few minutes to read at least the basic introduction provided by the `?` command, and keep in mind that the `%quickref` command at all times can be used as a quick reference "cheat sheet" of the most frequently used features of IPython.
1037
1037
1038 At the IPython prompt, any valid Python code that you type will be executed similarly to the default Python shell (though often with more informative feedback). But since IPython is a *superset* of the default Python shell; let's have a brief look at some of its additional functionality.
1038 At the IPython prompt, any valid Python code that you type will be executed similarly to the default Python shell (though often with more informative feedback). But since IPython is a *superset* of the default Python shell; let's have a brief look at some of its additional functionality.
1039
1039
1040 **Object introspection**
1040 **Object introspection**
1041
1041
1042 A simple `?` command provides a general introduction to IPython, but as indicated in the banner above, you can use the `?` syntax to ask for details about any object. For example, if we type `_1?`, IPython will print the following details about this variable:
1042 A simple `?` command provides a general introduction to IPython, but as indicated in the banner above, you can use the `?` syntax to ask for details about any object. For example, if we type `_1?`, IPython will print the following details about this variable:
1043
1043
1044 In [14]: _1?
1044 In [14]: _1?
1045 Type: long
1045 Type: long
1046 Base Class: <type 'long'>
1046 Base Class: <type 'long'>
1047 String Form:18446744073709551616
1047 String Form:18446744073709551616
1048 Namespace: Interactive
1048 Namespace: Interactive
1049 Docstring:
1049 Docstring:
1050 long(x[, base]) -> integer
1050 long(x[, base]) -> integer
1051
1051
1052 Convert a string or number to a long integer, if possible. A floating
1052 Convert a string or number to a long integer, if possible. A floating
1053
1053
1054 [etc... snipped for brevity]
1054 [etc... snipped for brevity]
1055
1055
1056 If you add a second `?` and for any oobject `x` type `x??`, IPython will try to provide an even more detailed analsysi of the object, including its syntax-highlighted source code when it can be found. It's possible that `x??` returns the same information as `x?`, but in many cases `x??` will indeed provide additional details.
1056 If you add a second `?` and for any oobject `x` type `x??`, IPython will try to provide an even more detailed analsysi of the object, including its syntax-highlighted source code when it can be found. It's possible that `x??` returns the same information as `x?`, but in many cases `x??` will indeed provide additional details.
1057
1057
1058 Finally, the `?` syntax is also useful to search *namespaces* with wildcards. Suppose you are wondering if there is any function in Numpy that may do text-related things; with `np.*txt*?`, IPython will print all the names in the `np` namespace (our Numpy shorthand) that have 'txt' anywhere in their name:
1058 Finally, the `?` syntax is also useful to search *namespaces* with wildcards. Suppose you are wondering if there is any function in Numpy that may do text-related things; with `np.*txt*?`, IPython will print all the names in the `np` namespace (our Numpy shorthand) that have 'txt' anywhere in their name:
1059
1059
1060 In [17]: np.*txt*?
1060 In [17]: np.*txt*?
1061 np.genfromtxt
1061 np.genfromtxt
1062 np.loadtxt
1062 np.loadtxt
1063 np.mafromtxt
1063 np.mafromtxt
1064 np.ndfromtxt
1064 np.ndfromtxt
1065 np.recfromtxt
1065 np.recfromtxt
1066 np.savetxt
1066 np.savetxt
1067
1067
1068 **Tab completion**
1068 **Tab completion**
1069
1069
1070 IPython makes the tab key work extra hard for you as a way to rapidly inspect objects and libraries. Whenever you have typed something at the prompt, by hitting the `<tab>` key IPython will try to complete the rest of the line. For this, IPython will analyze the text you had so far and try to search for Python data or files that may match the context you have already provided.
1070 IPython makes the tab key work extra hard for you as a way to rapidly inspect objects and libraries. Whenever you have typed something at the prompt, by hitting the `<tab>` key IPython will try to complete the rest of the line. For this, IPython will analyze the text you had so far and try to search for Python data or files that may match the context you have already provided.
1071
1071
1072 For example, if you type `np.load` and hit the <tab> key, you'll see:
1072 For example, if you type `np.load` and hit the <tab> key, you'll see:
1073
1073
1074 In [21]: np.load<TAB HERE>
1074 In [21]: np.load<TAB HERE>
1075 np.load np.loads np.loadtxt
1075 np.load np.loads np.loadtxt
1076
1076
1077 so you can quickly find all the load-related functionality in numpy. Tab completion works even for function arguments, for example consider this function definition:
1077 so you can quickly find all the load-related functionality in numpy. Tab completion works even for function arguments, for example consider this function definition:
1078
1078
1079 In [20]: def f(x, frobinate=False):
1079 In [20]: def f(x, frobinate=False):
1080 ....: if frobinate:
1080 ....: if frobinate:
1081 ....: return x**2
1081 ....: return x**2
1082 ....:
1082 ....:
1083
1083
1084 If you now use the `<tab>` key after having typed 'fro' you'll get all valid Python completions, but those marked with `=` at the end are known to be keywords of your function:
1084 If you now use the `<tab>` key after having typed 'fro' you'll get all valid Python completions, but those marked with `=` at the end are known to be keywords of your function:
1085
1085
1086 In [21]: f(2, fro<TAB HERE>
1086 In [21]: f(2, fro<TAB HERE>
1087 frobinate= frombuffer fromfunction frompyfunc fromstring
1087 frobinate= frombuffer fromfunction frompyfunc fromstring
1088 from fromfile fromiter fromregex frozenset
1088 from fromfile fromiter fromregex frozenset
1089
1089
1090 at this point you can add the `b` letter and hit `<tab>` once more, and IPython will finish the line for you:
1090 at this point you can add the `b` letter and hit `<tab>` once more, and IPython will finish the line for you:
1091
1091
1092 In [21]: f(2, frobinate=
1092 In [21]: f(2, frobinate=
1093
1093
1094 As a beginner, simply get into the habit of using `<tab>` after most objects; it should quickly become second nature as you will see how helps keep a fluid workflow and discover useful information. Later on you can also customize this behavior by writing your own completion code, if you so desire.
1094 As a beginner, simply get into the habit of using `<tab>` after most objects; it should quickly become second nature as you will see how helps keep a fluid workflow and discover useful information. Later on you can also customize this behavior by writing your own completion code, if you so desire.
1095
1095
1096 **Matplotlib integration**
1096 **Matplotlib integration**
1097
1097
1098 One of the most useful features of IPython for scientists is its tight integration with matplotlib: at the terminal IPython lets you open matplotlib figures without blocking your typing (which is what happens if you try to do the same thing at the default Python shell), and in the Qt console and notebook you can even view your figures embedded in your workspace next to the code that created them.
1098 One of the most useful features of IPython for scientists is its tight integration with matplotlib: at the terminal IPython lets you open matplotlib figures without blocking your typing (which is what happens if you try to do the same thing at the default Python shell), and in the Qt console and notebook you can even view your figures embedded in your workspace next to the code that created them.
1099
1099
1100 The matplotlib support can be either activated when you start IPython by passing the `--pylab` flag, or at any point later in your session by using the `%pylab` command. If you start IPython with `--pylab`, you'll see something like this (note the extra message about pylab):
1100 The matplotlib support can be either activated when you start IPython by passing the `--pylab` flag, or at any point later in your session by using the `%pylab` command. If you start IPython with `--pylab`, you'll see something like this (note the extra message about pylab):
1101
1101
1102 $ ipython --pylab
1102 $ ipython --pylab
1103 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1103 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1104 Type "copyright", "credits" or "license" for more information.
1104 Type "copyright", "credits" or "license" for more information.
1105
1105
1106 IPython 0.13.dev -- An enhanced Interactive Python.
1106 IPython 0.13.dev -- An enhanced Interactive Python.
1107 ? -> Introduction and overview of IPython's features.
1107 ? -> Introduction and overview of IPython's features.
1108 %quickref -> Quick reference.
1108 %quickref -> Quick reference.
1109 help -> Python's own help system.
1109 help -> Python's own help system.
1110 object? -> Details about 'object', use 'object??' for extra details.
1110 object? -> Details about 'object', use 'object??' for extra details.
1111
1111
1112 Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
1112 Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
1113 For more information, type 'help(pylab)'.
1113 For more information, type 'help(pylab)'.
1114
1114
1115 In [1]:
1115 In [1]:
1116
1116
1117 Furthermore, IPython will import `numpy` with the `np` shorthand, `matplotlib.pyplot` as `plt`, and it will also load all of the numpy and pyplot top-level names so that you can directly type something like:
1117 Furthermore, IPython will import `numpy` with the `np` shorthand, `matplotlib.pyplot` as `plt`, and it will also load all of the numpy and pyplot top-level names so that you can directly type something like:
1118
1118
1119 In [1]: x = linspace(0, 2*pi, 200)
1119 In [1]: x = linspace(0, 2*pi, 200)
1120
1120
1121 In [2]: plot(x, sin(x))
1121 In [2]: plot(x, sin(x))
1122 Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1122 Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1123
1123
1124 instead of having to prefix each call with its full signature (as we have been doing in the examples thus far):
1124 instead of having to prefix each call with its full signature (as we have been doing in the examples thus far):
1125
1125
1126 In [3]: x = np.linspace(0, 2*np.pi, 200)
1126 In [3]: x = np.linspace(0, 2*np.pi, 200)
1127
1127
1128 In [4]: plt.plot(x, np.sin(x))
1128 In [4]: plt.plot(x, np.sin(x))
1129 Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1129 Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1130
1130
1131 This shorthand notation can be a huge time-saver when working interactively (it's a few characters but you are likely to type them hundreds of times in a session). But we should note that as you develop persistent scripts and notebooks meant for reuse, it's best to get in the habit of using the longer notation (known as *fully qualified names* as it's clearer where things come from and it makes for more robust, readable and maintainable code in the long run).
1131 This shorthand notation can be a huge time-saver when working interactively (it's a few characters but you are likely to type them hundreds of times in a session). But we should note that as you develop persistent scripts and notebooks meant for reuse, it's best to get in the habit of using the longer notation (known as *fully qualified names* as it's clearer where things come from and it makes for more robust, readable and maintainable code in the long run).
1132
1132
1133 **Access to the operating system and files**
1133 **Access to the operating system and files**
1134
1134
1135 In IPython, you can type `ls` to see your files or `cd` to change directories, just like you would at a regular system prompt:
1135 In IPython, you can type `ls` to see your files or `cd` to change directories, just like you would at a regular system prompt:
1136
1136
1137 In [2]: cd tests
1137 In [2]: cd tests
1138 /home/fperez/ipython/nbconvert/tests
1138 /home/fperez/ipython/nbconvert/tests
1139
1139
1140 In [3]: ls test.*
1140 In [3]: ls test.*
1141 test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1141 test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1142
1142
1143 Furthermore, if you use the `!` at the beginning of a line, any commands you pass afterwards go directly to the operating system:
1143 Furthermore, if you use the `!` at the beginning of a line, any commands you pass afterwards go directly to the operating system:
1144
1144
1145 In [4]: !echo "Hello IPython"
1145 In [4]: !echo "Hello IPython"
1146 Hello IPython
1146 Hello IPython
1147
1147
1148 IPython offers a useful twist in this feature: it will substitute in the command the value of any *Python* variable you may have if you prepend it with a `$` sign:
1148 IPython offers a useful twist in this feature: it will substitute in the command the value of any *Python* variable you may have if you prepend it with a `$` sign:
1149
1149
1150 In [5]: message = 'IPython interpolates from Python to the shell'
1150 In [5]: message = 'IPython interpolates from Python to the shell'
1151
1151
1152 In [6]: !echo $message
1152 In [6]: !echo $message
1153 IPython interpolates from Python to the shell
1153 IPython interpolates from Python to the shell
1154
1154
1155 This feature can be extremely useful, as it lets you combine the power and clarity of Python for complex logic with the immediacy and familiarity of many shell commands. Additionally, if you start the line with *two* `$$` signs, the output of the command will be automatically captured as a list of lines, e.g.:
1155 This feature can be extremely useful, as it lets you combine the power and clarity of Python for complex logic with the immediacy and familiarity of many shell commands. Additionally, if you start the line with *two* `$$` signs, the output of the command will be automatically captured as a list of lines, e.g.:
1156
1156
1157 In [10]: !!ls test.*
1157 In [10]: !!ls test.*
1158 Out[10]:
1158 Out[10]:
1159 ['test.aux',
1159 ['test.aux',
1160 'test.html',
1160 'test.html',
1161 'test.ipynb',
1161 'test.ipynb',
1162 'test.log',
1162 'test.log',
1163 'test.out',
1163 'test.out',
1164 'test.pdf',
1164 'test.pdf',
1165 'test.rst',
1165 'test.rst',
1166 'test.tex']
1166 'test.tex']
1167
1167
1168 As explained above, you can now use this as the variable `_10`. If you directly want to capture the output of a system command to a Python variable, you can use the syntax `=!`:
1168 As explained above, you can now use this as the variable `_10`. If you directly want to capture the output of a system command to a Python variable, you can use the syntax `=!`:
1169
1169
1170 In [11]: testfiles =! ls test.*
1170 In [11]: testfiles =! ls test.*
1171
1171
1172 In [12]: print testfiles
1172 In [12]: print testfiles
1173 ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
1173 ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
1174
1174
1175 Finally, the special `%alias` command lets you define names that are shorthands for system commands, so that you can type them without having to prefix them via `!` explicitly (for example, `ls` is an alias that has been predefined for you at startup).
1175 Finally, the special `%alias` command lets you define names that are shorthands for system commands, so that you can type them without having to prefix them via `!` explicitly (for example, `ls` is an alias that has been predefined for you at startup).
1176
1176
1177 **Magic commands**
1177 **Magic commands**
1178
1178
1179 IPython has a system for special commands, called 'magics', that let you control IPython itself and perform many common tasks with a more shell-like syntax: it uses spaces for delimiting arguments, flags can be set with dashes and all arguments are treated as strings, so no additional quoting is required. This kind of syntax is invalid in the Python language but very convenient for interactive typing (less parentheses, commans and quoting everywhere); IPython distinguishes the two by detecting lines that start with the `%` character.
1179 IPython has a system for special commands, called 'magics', that let you control IPython itself and perform many common tasks with a more shell-like syntax: it uses spaces for delimiting arguments, flags can be set with dashes and all arguments are treated as strings, so no additional quoting is required. This kind of syntax is invalid in the Python language but very convenient for interactive typing (less parentheses, commans and quoting everywhere); IPython distinguishes the two by detecting lines that start with the `%` character.
1180
1180
1181 You can learn more about the magic system by simply typing `%magic` at the prompt, which will give you a short description plus the documentation on *all* available magics. If you want to see only a listing of existing magics, you can use `%lsmagic`:
1181 You can learn more about the magic system by simply typing `%magic` at the prompt, which will give you a short description plus the documentation on *all* available magics. If you want to see only a listing of existing magics, you can use `%lsmagic`:
1182
1182
1183 In [4]: lsmagic
1183 In [4]: lsmagic
1184 Available magic functions:
1184 Available magic functions:
1185 %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
1185 %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
1186 %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
1186 %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
1187 %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
1187 %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
1188 %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
1188 %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
1189 %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
1189 %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
1190 %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
1190 %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
1191 %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
1191 %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
1192 %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
1192 %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
1193
1193
1194 Automagic is ON, % prefix NOT needed for magic functions.
1194 Automagic is ON, % prefix NOT needed for magic functions.
1195
1195
1196 Note how the example above omitted the eplicit `%` marker and simply uses `lsmagic`. As long as the 'automagic' feature is on (which it is by default), you can omit the `%` marker as long as there is no ambiguity with a Python variable of the same name.
1196 Note how the example above omitted the eplicit `%` marker and simply uses `lsmagic`. As long as the 'automagic' feature is on (which it is by default), you can omit the `%` marker as long as there is no ambiguity with a Python variable of the same name.
1197
1197
1198 **Running your code**
1198 **Running your code**
1199
1199
1200 While it's easy to type a few lines of code in IPython, for any long-lived work you should keep your codes in Python scripts (or in IPython notebooks, see below). Consider that you have a script, in this case trivially simple for the sake of brevity, named `simple.py`:
1200 While it's easy to type a few lines of code in IPython, for any long-lived work you should keep your codes in Python scripts (or in IPython notebooks, see below). Consider that you have a script, in this case trivially simple for the sake of brevity, named `simple.py`:
1201
1201
1202 In [12]: !cat simple.py
1202 In [12]: !cat simple.py
1203 import numpy as np
1203 import numpy as np
1204
1204
1205 x = np.random.normal(size=100)
1205 x = np.random.normal(size=100)
1206
1206
1207 print 'First elment of x:', x[0]
1207 print 'First elment of x:', x[0]
1208
1208
1209 The typical workflow with IPython is to use the `%run` magic to execute your script (you can omit the .py extension if you want). When you run it, the script will execute just as if it had been run at the system prompt with `python simple.py` (though since modules don't get re-executed on new imports by Python, all system initialization is essentially free, which can have a significant run time impact in some cases):
1209 The typical workflow with IPython is to use the `%run` magic to execute your script (you can omit the .py extension if you want). When you run it, the script will execute just as if it had been run at the system prompt with `python simple.py` (though since modules don't get re-executed on new imports by Python, all system initialization is essentially free, which can have a significant run time impact in some cases):
1210
1210
1211 In [13]: run simple
1211 In [13]: run simple
1212 First elment of x: -1.55872256289
1212 First elment of x: -1.55872256289
1213
1213
1214 Once it completes, all variables defined in it become available for you to use interactively:
1214 Once it completes, all variables defined in it become available for you to use interactively:
1215
1215
1216 In [14]: x.shape
1216 In [14]: x.shape
1217 Out[14]: (100,)
1217 Out[14]: (100,)
1218
1218
1219 This allows you to plot data, try out ideas, etc, in a `%run`/interact/edit cycle that can be very productive. As you start understanding your problem better you can refine your script further, incrementally improving it based on the work you do at the IPython prompt. At any point you can use the `%hist` magic to print out your history without prompts, so that you can copy useful fragments back into the script.
1219 This allows you to plot data, try out ideas, etc, in a `%run`/interact/edit cycle that can be very productive. As you start understanding your problem better you can refine your script further, incrementally improving it based on the work you do at the IPython prompt. At any point you can use the `%hist` magic to print out your history without prompts, so that you can copy useful fragments back into the script.
1220
1220
1221 By default, `%run` executes scripts in a completely empty namespace, to better mimic how they would execute at the system prompt with plain Python. But if you use the `-i` flag, the script will also see your interactively defined variables. This lets you edit in a script larger amounts of code that still behave as if you had typed them at the IPython prompt.
1221 By default, `%run` executes scripts in a completely empty namespace, to better mimic how they would execute at the system prompt with plain Python. But if you use the `-i` flag, the script will also see your interactively defined variables. This lets you edit in a script larger amounts of code that still behave as if you had typed them at the IPython prompt.
1222
1222
1223 You can also get a summary of the time taken by your script with the `-t` flag; consider a different script `randsvd.py` that takes a bit longer to run:
1223 You can also get a summary of the time taken by your script with the `-t` flag; consider a different script `randsvd.py` that takes a bit longer to run:
1224
1224
1225 In [21]: run -t randsvd.py
1225 In [21]: run -t randsvd.py
1226
1226
1227 IPython CPU timings (estimated):
1227 IPython CPU timings (estimated):
1228 User : 0.38 s.
1228 User : 0.38 s.
1229 System : 0.04 s.
1229 System : 0.04 s.
1230 Wall time: 0.34 s.
1230 Wall time: 0.34 s.
1231
1231
1232 `User` is the time spent by the computer executing your code, while `System` is the time the operating system had to work on your behalf, doing things like memory allocation that are needed by your code but that you didn't explicitly program and that happen inside the kernel. The `Wall time` is the time on a 'clock on the wall' between the start and end of your program.
1232 `User` is the time spent by the computer executing your code, while `System` is the time the operating system had to work on your behalf, doing things like memory allocation that are needed by your code but that you didn't explicitly program and that happen inside the kernel. The `Wall time` is the time on a 'clock on the wall' between the start and end of your program.
1233
1233
1234 If `Wall > User+System`, your code is most likely waiting idle for certain periods. That could be waiting for data to arrive from a remote source or perhaps because the operating system has to swap large amounts of virtual memory. If you know that your code doesn't explicitly wait for remote data to arrive, you should investigate further to identify possible ways of improving the performance profile.
1234 If `Wall > User+System`, your code is most likely waiting idle for certain periods. That could be waiting for data to arrive from a remote source or perhaps because the operating system has to swap large amounts of virtual memory. If you know that your code doesn't explicitly wait for remote data to arrive, you should investigate further to identify possible ways of improving the performance profile.
1235
1235
1236 If you only want to time how long a single statement takes, you don't need to put it into a script as you can use the `%timeit` magic, which uses Python's `timeit` module to very carefully measure timig data; `timeit` can measure even short statements that execute extremely fast:
1236 If you only want to time how long a single statement takes, you don't need to put it into a script as you can use the `%timeit` magic, which uses Python's `timeit` module to very carefully measure timig data; `timeit` can measure even short statements that execute extremely fast:
1237
1237
1238 In [27]: %timeit a=1
1238 In [27]: %timeit a=1
1239 10000000 loops, best of 3: 23 ns per loop
1239 10000000 loops, best of 3: 23 ns per loop
1240
1240
1241 and for code that runs longer, it automatically adjusts so the overall measurement doesn't take too long:
1241 and for code that runs longer, it automatically adjusts so the overall measurement doesn't take too long:
1242
1242
1243 In [28]: %timeit np.linalg.svd(x)
1243 In [28]: %timeit np.linalg.svd(x)
1244 1 loops, best of 3: 310 ms per loop
1244 1 loops, best of 3: 310 ms per loop
1245
1245
1246 The `%run` magic still has more options for debugging and profiling data; you should read its documentation for many useful details (as always, just type `%run?`).
1246 The `%run` magic still has more options for debugging and profiling data; you should read its documentation for many useful details (as always, just type `%run?`).
1247
1247
1248 ## The graphical Qt console
1248 ## The graphical Qt console
1249
1249
1250 If you type at the system prompt (see the IPython website for installation details, as this requires some additional libraries):
1250 If you type at the system prompt (see the IPython website for installation details, as this requires some additional libraries):
1251
1251
1252 $ ipython qtconsole
1252 $ ipython qtconsole
1253
1253
1254 instead of opening in a terminal as before, IPython will start a graphical console that at first sight appears just like a terminal, but which is in fact much more capable than a text-only terminal. This is a specialized terminal designed for interactive scientific work, and it supports full multi-line editing with color highlighting and graphical calltips for functions, it can keep multiple IPython sessions open simultaneously in tabs, and when scripts run it can display the figures inline directly in the work area.
1254 instead of opening in a terminal as before, IPython will start a graphical console that at first sight appears just like a terminal, but which is in fact much more capable than a text-only terminal. This is a specialized terminal designed for interactive scientific work, and it supports full multi-line editing with color highlighting and graphical calltips for functions, it can keep multiple IPython sessions open simultaneously in tabs, and when scripts run it can display the figures inline directly in the work area.
1255
1255
1256 <center><img src="ipython_qtconsole2.png" width=400px></center>
1256 <center><img src="ipython_qtconsole2.png" width=400px></center>
1257
1257
1258 % This cell is for the pdflatex output only
1258 % This cell is for the pdflatex output only
1259 \begin{figure}[htbp]
1259 \begin{figure}[htbp]
1260 \centering
1260 \centering
1261 \includegraphics[width=3in]{ipython_qtconsole2.png}
1261 \includegraphics[width=3in]{ipython_qtconsole2.png}
1262 \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
1262 \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
1263 \end{figure}
1263 \end{figure}
1264
1264
1265 The Qt console accepts the same `--pylab` startup flags as the terminal, but you can additionally supply the value `--pylab inline`, which enables the support for inline graphics shown in the figure. This is ideal for keeping all the code and figures in the same session, given that the console can save the output of your entire session to HTML or PDF.
1265 The Qt console accepts the same `--pylab` startup flags as the terminal, but you can additionally supply the value `--pylab inline`, which enables the support for inline graphics shown in the figure. This is ideal for keeping all the code and figures in the same session, given that the console can save the output of your entire session to HTML or PDF.
1266
1266
1267 Since the Qt console makes it far more convenient than the terminal to edit blocks of code with multiple lines, in this environment it's worth knowing about the `%loadpy` magic function. `%loadpy` takes a path to a local file or remote URL, fetches its contents, and puts it in the work area for you to further edit and execute. It can be an extremely fast and convenient way of loading code from local disk or remote examples from sites such as the [Matplotlib gallery](http://matplotlib.sourceforge.net/gallery.html).
1267 Since the Qt console makes it far more convenient than the terminal to edit blocks of code with multiple lines, in this environment it's worth knowing about the `%loadpy` magic function. `%loadpy` takes a path to a local file or remote URL, fetches its contents, and puts it in the work area for you to further edit and execute. It can be an extremely fast and convenient way of loading code from local disk or remote examples from sites such as the [Matplotlib gallery](http://matplotlib.sourceforge.net/gallery.html).
1268
1268
1269 Other than its enhanced capabilities for code and graphics, all of the features of IPython we've explained before remain functional in this graphical console.
1269 Other than its enhanced capabilities for code and graphics, all of the features of IPython we've explained before remain functional in this graphical console.
1270
1270
1271 ## The IPython Notebook
1271 ## The IPython Notebook
1272
1272
1273 The third way to interact with IPython, in addition to the terminal and graphical Qt console, is a powerful web interface called the "IPython Notebook". If you run at the system console (you can omit the `pylab` flags if you don't need plotting support):
1273 The third way to interact with IPython, in addition to the terminal and graphical Qt console, is a powerful web interface called the "IPython Notebook". If you run at the system console (you can omit the `pylab` flags if you don't need plotting support):
1274
1274
1275 $ ipython notebook --pylab inline
1275 $ ipython notebook --pylab inline
1276
1276
1277 IPython will start a process that runs a web server in your local machine and to which a web browser can connect. The Notebook is a workspace that lets you execute code in blocks called 'cells' and displays any results and figures, but which can also contain arbitrary text (including LaTeX-formatted mathematical expressions) and any rich media that a modern web browser is capable of displaying.
1277 IPython will start a process that runs a web server in your local machine and to which a web browser can connect. The Notebook is a workspace that lets you execute code in blocks called 'cells' and displays any results and figures, but which can also contain arbitrary text (including LaTeX-formatted mathematical expressions) and any rich media that a modern web browser is capable of displaying.
1278
1278
1279 <center><img src="ipython-notebook-specgram-2.png" width=400px></center>
1279 <center><img src="ipython-notebook-specgram-2.png" width=400px></center>
1280
1280
1281 % This cell is for the pdflatex output only
1281 % This cell is for the pdflatex output only
1282 \begin{figure}[htbp]
1282 \begin{figure}[htbp]
1283 \centering
1283 \centering
1284 \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
1284 \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
1285 \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
1285 \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
1286 \end{figure}
1286 \end{figure}
1287
1287
1288 In fact, this document was written as a Notebook, and only exported to LaTeX for printing. Inside of each cell, all the features of IPython that we have discussed before remain functional, since ultimately this web client is communicating with the same IPython code that runs in the terminal. But this interface is a much more rich and powerful environment for maintaining long-term "live and executable" scientific documents.
1288 In fact, this document was written as a Notebook, and only exported to LaTeX for printing. Inside of each cell, all the features of IPython that we have discussed before remain functional, since ultimately this web client is communicating with the same IPython code that runs in the terminal. But this interface is a much more rich and powerful environment for maintaining long-term "live and executable" scientific documents.
1289
1289
1290 Notebook environments have existed in commercial systems like Mathematica(TM) and Maple(TM) for a long time; in the open source world the [Sage](http://sagemath.org) project blazed this particular trail starting in 2006, and now we bring all the features that have made IPython such a widely used tool to a Notebook model.
1290 Notebook environments have existed in commercial systems like Mathematica(TM) and Maple(TM) for a long time; in the open source world the [Sage](http://sagemath.org) project blazed this particular trail starting in 2006, and now we bring all the features that have made IPython such a widely used tool to a Notebook model.
1291
1291
1292 Since the Notebook runs as a web application, it is possible to configure it for remote access, letting you run your computations on a persistent server close to your data, which you can then access remotely from any browser-equipped computer. We encourage you to read the extensive documentation provided by the IPython project for details on how to do this and many more features of the notebook.
1292 Since the Notebook runs as a web application, it is possible to configure it for remote access, letting you run your computations on a persistent server close to your data, which you can then access remotely from any browser-equipped computer. We encourage you to read the extensive documentation provided by the IPython project for details on how to do this and many more features of the notebook.
1293
1293
1294 Finally, as we said earlier, IPython also has a high-level and easy to use set of libraries for parallel computing, that let you control (interactively if desired) not just one IPython but an entire cluster of 'IPython engines'. Unfortunately a detailed discussion of these tools is beyond the scope of this text, but should you need to parallelize your analysis codes, a quick read of the tutorials and examples provided at the IPython site may prove fruitful.
1294 Finally, as we said earlier, IPython also has a high-level and easy to use set of libraries for parallel computing, that let you control (interactively if desired) not just one IPython but an entire cluster of 'IPython engines'. Unfortunately a detailed discussion of these tools is beyond the scope of this text, but should you need to parallelize your analysis codes, a quick read of the tutorials and examples provided at the IPython site may prove fruitful.
@@ -1,1181 +1,1181 b''
1 ## An Introduction to the Scientific Python Ecosystem
1 ## An Introduction to the Scientific Python Ecosystem
2
2
3 # While the Python language is an excellent tool for general-purpose programming, with a highly readable syntax, rich and powerful data types (strings, lists, sets, dictionaries, arbitrary length integers, etc) and a very comprehensive standard library, it was not designed specifically for mathematical and scientific computing. Neither the language nor its standard library have facilities for the efficient representation of multidimensional datasets, tools for linear algebra and general matrix manipulations (an essential building block of virtually all technical computing), nor any data visualization facilities.
3 # While the Python language is an excellent tool for general-purpose programming, with a highly readable syntax, rich and powerful data types (strings, lists, sets, dictionaries, arbitrary length integers, etc) and a very comprehensive standard library, it was not designed specifically for mathematical and scientific computing. Neither the language nor its standard library have facilities for the efficient representation of multidimensional datasets, tools for linear algebra and general matrix manipulations (an essential building block of virtually all technical computing), nor any data visualization facilities.
4 #
4 #
5 # In particular, Python lists are very flexible containers that can be nested arbitrarily deep and which can hold any Python object in them, but they are poorly suited to represent efficiently common mathematical constructs like vectors and matrices. In contrast, much of our modern heritage of scientific computing has been built on top of libraries written in the Fortran language, which has native support for vectors and matrices as well as a library of mathematical functions that can efficiently operate on entire arrays at once.
5 # In particular, Python lists are very flexible containers that can be nested arbitrarily deep and which can hold any Python object in them, but they are poorly suited to represent efficiently common mathematical constructs like vectors and matrices. In contrast, much of our modern heritage of scientific computing has been built on top of libraries written in the Fortran language, which has native support for vectors and matrices as well as a library of mathematical functions that can efficiently operate on entire arrays at once.
6
6
7 ### Scientific Python: a collaboration of projects built by scientists
7 ### Scientific Python: a collaboration of projects built by scientists
8
8
9 # The scientific community has developed a set of related Python libraries that provide powerful array facilities, linear algebra, numerical algorithms, data visualization and more. In this appendix, we will briefly outline the tools most frequently used for this purpose, that make "Scientific Python" something far more powerful than the Python language alone.
9 # The scientific community has developed a set of related Python libraries that provide powerful array facilities, linear algebra, numerical algorithms, data visualization and more. In this appendix, we will briefly outline the tools most frequently used for this purpose, that make "Scientific Python" something far more powerful than the Python language alone.
10 #
10 #
11 # For reasons of space, we can only describe in some detail the central Numpy library, but below we provide links to the websites of each project where you can read their documentation in more detail.
11 # For reasons of space, we can only describe in some detail the central Numpy library, but below we provide links to the websites of each project where you can read their documentation in more detail.
12 #
12 #
13 # First, let's look at an overview of the basic tools that most scientists use in daily research with Python. The core of this ecosystem is composed of:
13 # First, let's look at an overview of the basic tools that most scientists use in daily research with Python. The core of this ecosystem is composed of:
14 #
14 #
15 # * Numpy: the basic library that most others depend on, it provides a powerful array type that can represent multidmensional datasets of many different kinds and that supports arithmetic operations. Numpy also provides a library of common mathematical functions, basic linear algebra, random number generation and Fast Fourier Transforms. Numpy can be found at [numpy.scipy.org](http://numpy.scipy.org)
15 # * Numpy: the basic library that most others depend on, it provides a powerful array type that can represent multidmensional datasets of many different kinds and that supports arithmetic operations. Numpy also provides a library of common mathematical functions, basic linear algebra, random number generation and Fast Fourier Transforms. Numpy can be found at [numpy.scipy.org](http://numpy.scipy.org)
16 #
16 #
17 # * Scipy: a large collection of numerical algorithms that operate on numpy arrays and provide facilities for many common tasks in scientific computing, including dense and sparse linear algebra support, optimization, special functions, statistics, n-dimensional image processing, signal processing and more. Scipy can be found at [scipy.org](http://scipy.org).
17 # * Scipy: a large collection of numerical algorithms that operate on numpy arrays and provide facilities for many common tasks in scientific computing, including dense and sparse linear algebra support, optimization, special functions, statistics, n-dimensional image processing, signal processing and more. Scipy can be found at [scipy.org](http://scipy.org).
18 #
18 #
19 # * Matplotlib: a data visualization library with a strong focus on producing high-quality output, it supports a variety of common scientific plot types in two and three dimensions, with precise control over the final output and format for publication-quality results. Matplotlib can also be controlled interactively allowing graphical manipulation of your data (zooming, panning, etc) and can be used with most modern user interface toolkits. It can be found at [matplotlib.sf.net](http://matplotlib.sf.net).
19 # * Matplotlib: a data visualization library with a strong focus on producing high-quality output, it supports a variety of common scientific plot types in two and three dimensions, with precise control over the final output and format for publication-quality results. Matplotlib can also be controlled interactively allowing graphical manipulation of your data (zooming, panning, etc) and can be used with most modern user interface toolkits. It can be found at [matplotlib.sf.net](http://matplotlib.sf.net).
20 #
20 #
21 # * IPython: while not strictly scientific in nature, IPython is the interactive environment in which many scientists spend their time. IPython provides a powerful Python shell that integrates tightly with Matplotlib and with easy access to the files and operating system, and which can execute in a terminal or in a graphical Qt console. IPython also has a web-based notebook interface that can combine code with text, mathematical expressions, figures and multimedia. It can be found at [ipython.org](http://ipython.org).
21 # * IPython: while not strictly scientific in nature, IPython is the interactive environment in which many scientists spend their time. IPython provides a powerful Python shell that integrates tightly with Matplotlib and with easy access to the files and operating system, and which can execute in a terminal or in a graphical Qt console. IPython also has a web-based notebook interface that can combine code with text, mathematical expressions, figures and multimedia. It can be found at [ipython.org](http://ipython.org).
22 #
22 #
23 # While each of these tools can be installed separately, in our opinion the most convenient way today of accessing them (especially on Windows and Mac computers) is to install the [Free Edition of the Enthought Python Distribution](http://www.enthought.com/products/epd_free.php) which contain all the above. Other free alternatives on Windows (but not on Macs) are [Python(x,y)](http://code.google.com/p/pythonxy) and [ Christoph Gohlke's packages page](http://www.lfd.uci.edu/~gohlke/pythonlibs).
23 # While each of these tools can be installed separately, in our opinion the most convenient way today of accessing them (especially on Windows and Mac computers) is to install the [Free Edition of the Enthought Python Distribution](http://www.enthought.com/products/epd_free.php) which contain all the above. Other free alternatives on Windows (but not on Macs) are [Python(x,y)](http://code.google.com/p/pythonxy) and [ Christoph Gohlke's packages page](http://www.lfd.uci.edu/~gohlke/pythonlibs).
24 #
24 #
25 # These four 'core' libraries are in practice complemented by a number of other tools for more specialized work. We will briefly list here the ones that we think are the most commonly needed:
25 # These four 'core' libraries are in practice complemented by a number of other tools for more specialized work. We will briefly list here the ones that we think are the most commonly needed:
26 #
26 #
27 # * Sympy: a symbolic manipulation tool that turns a Python session into a computer algebra system. It integrates with the IPython notebook, rendering results in properly typeset mathematical notation. [sympy.org](http://sympy.org).
27 # * Sympy: a symbolic manipulation tool that turns a Python session into a computer algebra system. It integrates with the IPython notebook, rendering results in properly typeset mathematical notation. [sympy.org](http://sympy.org).
28 #
28 #
29 # * Mayavi: sophisticated 3d data visualization; [code.enthought.com/projects/mayavi](http://code.enthought.com/projects/mayavi).
29 # * Mayavi: sophisticated 3d data visualization; [code.enthought.com/projects/mayavi](http://code.enthought.com/projects/mayavi).
30 #
30 #
31 # * Cython: a bridge language between Python and C, useful both to optimize performance bottlenecks in Python and to access C libraries directly; [cython.org](http://cython.org).
31 # * Cython: a bridge language between Python and C, useful both to optimize performance bottlenecks in Python and to access C libraries directly; [cython.org](http://cython.org).
32 #
32 #
33 # * Pandas: high-performance data structures and data analysis tools, with powerful data alignment and structural manipulation capabilities; [pandas.pydata.org](http://pandas.pydata.org).
33 # * Pandas: high-performance data structures and data analysis tools, with powerful data alignment and structural manipulation capabilities; [pandas.pydata.org](http://pandas.pydata.org).
34 #
34 #
35 # * Statsmodels: statistical data exploration and model estimation; [statsmodels.sourceforge.net](http://statsmodels.sourceforge.net).
35 # * Statsmodels: statistical data exploration and model estimation; [statsmodels.sourceforge.net](http://statsmodels.sourceforge.net).
36 #
36 #
37 # * Scikit-learn: general purpose machine learning algorithms with a common interface; [scikit-learn.org](http://scikit-learn.org).
37 # * Scikit-learn: general purpose machine learning algorithms with a common interface; [scikit-learn.org](http://scikit-learn.org).
38 #
38 #
39 # * Scikits-image: image processing toolbox; [scikits-image.org](http://scikits-image.org).
39 # * Scikits-image: image processing toolbox; [scikits-image.org](http://scikits-image.org).
40 #
40 #
41 # * NetworkX: analysis of complex networks (in the graph theoretical sense); [networkx.lanl.gov](http://networkx.lanl.gov).
41 # * NetworkX: analysis of complex networks (in the graph theoretical sense); [networkx.lanl.gov](http://networkx.lanl.gov).
42 #
42 #
43 # * PyTables: management of hierarchical datasets using the industry-standard HDF5 format; [www.pytables.org](http://www.pytables.org).
43 # * PyTables: management of hierarchical datasets using the industry-standard HDF5 format; [www.pytables.org](http://www.pytables.org).
44 #
44 #
45 # Beyond these, for any specific problem you should look on the internet first, before starting to write code from scratch. There's a good chance that someone, somewhere, has written an open source library that you can use for part or all of your problem.
45 # Beyond these, for any specific problem you should look on the internet first, before starting to write code from scratch. There's a good chance that someone, somewhere, has written an open source library that you can use for part or all of your problem.
46
46
47 ### A note about the examples below
47 ### A note about the examples below
48
48
49 # In all subsequent examples, you will see blocks of input code, followed by the results of the code if the code generated output. This output may include text, graphics and other result objects. These blocks of input can be pasted into your interactive IPython session or notebook for you to execute. In the print version of this document, a thin vertical bar on the left of the blocks of input and output shows which blocks go together.
49 # In all subsequent examples, you will see blocks of input code, followed by the results of the code if the code generated output. This output may include text, graphics and other result objects. These blocks of input can be pasted into your interactive IPython session or notebook for you to execute. In the print version of this document, a thin vertical bar on the left of the blocks of input and output shows which blocks go together.
50 #
50 #
51 # If you are reading this text as an actual IPython notebook, you can press `Shift-Enter` or use the 'play' button on the toolbar (right-pointing triangle) to execute each block of code, known as a 'cell' in IPython:
51 # If you are reading this text as an actual IPython notebook, you can press `Shift-Enter` or use the 'play' button on the toolbar (right-pointing triangle) to execute each block of code, known as a 'cell' in IPython:
52
52
53 # In[71]:
53 # In[71]:
54 # This is a block of code, below you'll see its output
54 # This is a block of code, below you'll see its output
55 print "Welcome to the world of scientific computing with Python!"
55 print "Welcome to the world of scientific computing with Python!"
56
56
57 # Out[71]:
57 # Out[71]:
58 # Welcome to the world of scientific computing with Python!
58 # Welcome to the world of scientific computing with Python!
59 #
59 #
60 ## Motivation: the trapezoidal rule
60 ## Motivation: the trapezoidal rule
61
61
62 # In subsequent sections we'll provide a basic introduction to the nuts and bolts of the basic scientific python tools; but we'll first motivate it with a brief example that illustrates what you can do in a few lines with these tools. For this, we will use the simple problem of approximating a definite integral with the trapezoid rule:
62 # In subsequent sections we'll provide a basic introduction to the nuts and bolts of the basic scientific python tools; but we'll first motivate it with a brief example that illustrates what you can do in a few lines with these tools. For this, we will use the simple problem of approximating a definite integral with the trapezoid rule:
63 #
63 #
64 # $$
64 # $$
65 # \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
65 # \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
66 # $$
66 # $$
67 #
67 #
68 # Our task will be to compute this formula for a function such as:
68 # Our task will be to compute this formula for a function such as:
69 #
69 #
70 # $$
70 # $$
71 # f(x) = (x-3)(x-5)(x-7)+85
71 # f(x) = (x-3)(x-5)(x-7)+85
72 # $$
72 # $$
73 #
73 #
74 # integrated between $a=1$ and $b=9$.
74 # integrated between $a=1$ and $b=9$.
75 #
75 #
76 # First, we define the function and sample it evenly between 0 and 10 at 200 points:
76 # First, we define the function and sample it evenly between 0 and 10 at 200 points:
77
77
78 # In[1]:
78 # In[1]:
79 def f(x):
79 def f(x):
80 return (x-3)*(x-5)*(x-7)+85
80 return (x-3)*(x-5)*(x-7)+85
81
81
82 import numpy as np
82 import numpy as np
83 x = np.linspace(0, 10, 200)
83 x = np.linspace(0, 10, 200)
84 y = f(x)
84 y = f(x)
85
85
86 # We select $a$ and $b$, our integration limits, and we take only a few points in that region to illustrate the error behavior of the trapezoid approximation:
86 # We select $a$ and $b$, our integration limits, and we take only a few points in that region to illustrate the error behavior of the trapezoid approximation:
87
87
88 # In[2]:
88 # In[2]:
89 a, b = 1, 9
89 a, b = 1, 9
90 xint = x[logical_and(x>=a, x<=b)][::30]
90 xint = x[logical_and(x>=a, x<=b)][::30]
91 yint = y[logical_and(x>=a, x<=b)][::30]
91 yint = y[logical_and(x>=a, x<=b)][::30]
92
92
93 # Let's plot both the function and the area below it in the trapezoid approximation:
93 # Let's plot both the function and the area below it in the trapezoid approximation:
94
94
95 # In[3]:
95 # In[3]:
96 import matplotlib.pyplot as plt
96 import matplotlib.pyplot as plt
97 plt.plot(x, y, lw=2)
97 plt.plot(x, y, lw=2)
98 plt.axis([0, 10, 0, 140])
98 plt.axis([0, 10, 0, 140])
99 plt.fill_between(xint, 0, yint, facecolor='gray', alpha=0.4)
99 plt.fill_between(xint, 0, yint, facecolor='gray', alpha=0.4)
100 plt.text(0.5 * (a + b), 30,r"$\int_a^b f(x)dx$", horizontalalignment='center', fontsize=20);
100 plt.text(0.5 * (a + b), 30,r"$\int_a^b f(x)dx$", horizontalalignment='center', fontsize=20);
101
101
102 # Out[3]:
102 # Out[3]:
103 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.svg
103 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.svg
104
104
105 # Compute the integral both at high accuracy and with the trapezoid approximation
105 # Compute the integral both at high accuracy and with the trapezoid approximation
106
106
107 # In[4]:
107 # In[4]:
108 from scipy.integrate import quad, trapz
108 from scipy.integrate import quad, trapz
109 integral, error = quad(f, 1, 9)
109 integral, error = quad(f, 1, 9)
110 trap_integral = trapz(yint, xint)
110 trap_integral = trapz(yint, xint)
111 print "The integral is: %g +/- %.1e" % (integral, error)
111 print "The integral is: %g +/- %.1e" % (integral, error)
112 print "The trapezoid approximation with", len(xint), "points is:", trap_integral
112 print "The trapezoid approximation with", len(xint), "points is:", trap_integral
113 print "The absolute error is:", abs(integral - trap_integral)
113 print "The absolute error is:", abs(integral - trap_integral)
114
114
115 # Out[4]:
115 # Out[4]:
116 # The integral is: 680 +/- 7.5e-12
116 # The integral is: 680 +/- 7.5e-12
117 # The trapezoid approximation with 6 points is: 621.286411141
117 # The trapezoid approximation with 6 points is: 621.286411141
118 # The absolute error is: 58.7135888589
118 # The absolute error is: 58.7135888589
119 #
119 #
120 # This simple example showed us how, combining the numpy, scipy and matplotlib libraries we can provide an illustration of a standard method in elementary calculus with just a few lines of code. We will now discuss with more detail the basic usage of these tools.
120 # This simple example showed us how, combining the numpy, scipy and matplotlib libraries we can provide an illustration of a standard method in elementary calculus with just a few lines of code. We will now discuss with more detail the basic usage of these tools.
121
121
122 ## NumPy arrays: the right data structure for scientific computing
122 ## NumPy arrays: the right data structure for scientific computing
123
123
124 ### Basics of Numpy arrays
124 ### Basics of Numpy arrays
125
125
126 # We now turn our attention to the Numpy library, which forms the base layer for the entire 'scipy ecosystem'. Once you have installed numpy, you can import it as
126 # We now turn our attention to the Numpy library, which forms the base layer for the entire 'scipy ecosystem'. Once you have installed numpy, you can import it as
127
127
128 # In[5]:
128 # In[5]:
129 import numpy
129 import numpy
130
130
131 # though in this book we will use the common shorthand
131 # though in this book we will use the common shorthand
132
132
133 # In[6]:
133 # In[6]:
134 import numpy as np
134 import numpy as np
135
135
136 # As mentioned above, the main object provided by numpy is a powerful array. We'll start by exploring how the numpy array differs from Python lists. We start by creating a simple list and an array with the same contents of the list:
136 # As mentioned above, the main object provided by numpy is a powerful array. We'll start by exploring how the numpy array differs from Python lists. We start by creating a simple list and an array with the same contents of the list:
137
137
138 # In[7]:
138 # In[7]:
139 lst = [10, 20, 30, 40]
139 lst = [10, 20, 30, 40]
140 arr = np.array([10, 20, 30, 40])
140 arr = np.array([10, 20, 30, 40])
141
141
142 # Elements of a one-dimensional array are accessed with the same syntax as a list:
142 # Elements of a one-dimensional array are accessed with the same syntax as a list:
143
143
144 # In[8]:
144 # In[8]:
145 lst[0]
145 lst[0]
146
146
147 # Out[8]:
147 # Out[8]:
148 # 10
148 # 10
149
149
150
150
151 # In[9]:
151 # In[9]:
152 arr[0]
152 arr[0]
153
153
154 # Out[9]:
154 # Out[9]:
155 # 10
155 # 10
156
156
157
157
158 # In[10]:
158 # In[10]:
159 arr[-1]
159 arr[-1]
160
160
161 # Out[10]:
161 # Out[10]:
162 # 40
162 # 40
163
163
164
164
165 # In[11]:
165 # In[11]:
166 arr[2:]
166 arr[2:]
167
167
168 # Out[11]:
168 # Out[11]:
169 # array([30, 40])
169 # array([30, 40])
170
170
171
171
172 # The first difference to note between lists and arrays is that arrays are *homogeneous*; i.e. all elements of an array must be of the same type. In contrast, lists can contain elements of arbitrary type. For example, we can change the last element in our list above to be a string:
172 # The first difference to note between lists and arrays is that arrays are *homogeneous*; i.e. all elements of an array must be of the same type. In contrast, lists can contain elements of arbitrary type. For example, we can change the last element in our list above to be a string:
173
173
174 # In[12]:
174 # In[12]:
175 lst[-1] = 'a string inside a list'
175 lst[-1] = 'a string inside a list'
176 lst
176 lst
177
177
178 # Out[12]:
178 # Out[12]:
179 # [10, 20, 30, 'a string inside a list']
179 # [10, 20, 30, 'a string inside a list']
180
180
181
181
182 # but the same can not be done with an array, as we get an error message:
182 # but the same can not be done with an array, as we get an error message:
183
183
184 # In[13]:
184 # In[13]:
185 arr[-1] = 'a string inside an array'
185 arr[-1] = 'a string inside an array'
186
186
187 # Out[13]:
187 # Out[13]:
188 ---------------------------------------------------------------------------
188 ---------------------------------------------------------------------------
189 ValueError Traceback (most recent call last)
189 ValueError Traceback (most recent call last)
190 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
190 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
191 ----> 1 arr[-1] = 'a string inside an array'
191 ----> 1 arr[-1] = 'a string inside an array'
192
192
193 ValueError: invalid literal for long() with base 10: 'a string inside an array'
193 ValueError: invalid literal for long() with base 10: 'a string inside an array'
194
194
195 # The information about the type of an array is contained in its *dtype* attribute:
195 # The information about the type of an array is contained in its *dtype* attribute:
196
196
197 # In[14]:
197 # In[14]:
198 arr.dtype
198 arr.dtype
199
199
200 # Out[14]:
200 # Out[14]:
201 # dtype('int32')
201 # dtype('int32')
202
202
203
203
204 # Once an array has been created, its dtype is fixed and it can only store elements of the same type. For this example where the dtype is integer, if we store a floating point number it will be automatically converted into an integer:
204 # Once an array has been created, its dtype is fixed and it can only store elements of the same type. For this example where the dtype is integer, if we store a floating point number it will be automatically converted into an integer:
205
205
206 # In[15]:
206 # In[15]:
207 arr[-1] = 1.234
207 arr[-1] = 1.234
208 arr
208 arr
209
209
210 # Out[15]:
210 # Out[15]:
211 # array([10, 20, 30, 1])
211 # array([10, 20, 30, 1])
212
212
213
213
214 # Above we created an array from an existing list; now let us now see other ways in which we can create arrays, which we'll illustrate next. A common need is to have an array initialized with a constant value, and very often this value is 0 or 1 (suitable as starting value for additive and multiplicative loops respectively); `zeros` creates arrays of all zeros, with any desired dtype:
214 # Above we created an array from an existing list; now let us now see other ways in which we can create arrays, which we'll illustrate next. A common need is to have an array initialized with a constant value, and very often this value is 0 or 1 (suitable as starting value for additive and multiplicative loops respectively); `zeros` creates arrays of all zeros, with any desired dtype:
215
215
216 # In[16]:
216 # In[16]:
217 np.zeros(5, float)
217 np.zeros(5, float)
218
218
219 # Out[16]:
219 # Out[16]:
220 # array([ 0., 0., 0., 0., 0.])
220 # array([ 0., 0., 0., 0., 0.])
221
221
222
222
223 # In[17]:
223 # In[17]:
224 np.zeros(3, int)
224 np.zeros(3, int)
225
225
226 # Out[17]:
226 # Out[17]:
227 # array([0, 0, 0])
227 # array([0, 0, 0])
228
228
229
229
230 # In[18]:
230 # In[18]:
231 np.zeros(3, complex)
231 np.zeros(3, complex)
232
232
233 # Out[18]:
233 # Out[18]:
234 # array([ 0.+0.j, 0.+0.j, 0.+0.j])
234 # array([ 0.+0.j, 0.+0.j, 0.+0.j])
235
235
236
236
237 # and similarly for `ones`:
237 # and similarly for `ones`:
238
238
239 # In[19]:
239 # In[19]:
240 print '5 ones:', np.ones(5)
240 print '5 ones:', np.ones(5)
241
241
242 # Out[19]:
242 # Out[19]:
243 # 5 ones: [ 1. 1. 1. 1. 1.]
243 # 5 ones: [ 1. 1. 1. 1. 1.]
244 #
244 #
245 # If we want an array initialized with an arbitrary value, we can create an empty array and then use the fill method to put the value we want into the array:
245 # If we want an array initialized with an arbitrary value, we can create an empty array and then use the fill method to put the value we want into the array:
246
246
247 # In[20]:
247 # In[20]:
248 a = empty(4)
248 a = empty(4)
249 a.fill(5.5)
249 a.fill(5.5)
250 a
250 a
251
251
252 # Out[20]:
252 # Out[20]:
253 # array([ 5.5, 5.5, 5.5, 5.5])
253 # array([ 5.5, 5.5, 5.5, 5.5])
254
254
255
255
256 # Numpy also offers the `arange` function, which works like the builtin `range` but returns an array instead of a list:
256 # Numpy also offers the `arange` function, which works like the builtin `range` but returns an array instead of a list:
257
257
258 # In[21]:
258 # In[21]:
259 np.arange(5)
259 np.arange(5)
260
260
261 # Out[21]:
261 # Out[21]:
262 # array([0, 1, 2, 3, 4])
262 # array([0, 1, 2, 3, 4])
263
263
264
264
265 # and the `linspace` and `logspace` functions to create linearly and logarithmically-spaced grids respectively, with a fixed number of points and including both ends of the specified interval:
265 # and the `linspace` and `logspace` functions to create linearly and logarithmically-spaced grids respectively, with a fixed number of points and including both ends of the specified interval:
266
266
267 # In[22]:
267 # In[22]:
268 print "A linear grid between 0 and 1:", np.linspace(0, 1, 5)
268 print "A linear grid between 0 and 1:", np.linspace(0, 1, 5)
269 print "A logarithmic grid between 10**1 and 10**4: ", np.logspace(1, 4, 4)
269 print "A logarithmic grid between 10**1 and 10**4: ", np.logspace(1, 4, 4)
270
270
271 # Out[22]:
271 # Out[22]:
272 # A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
272 # A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
273 # A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
273 # A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
274 #
274 #
275 # Finally, it is often useful to create arrays with random numbers that follow a specific distribution. The `np.random` module contains a number of functions that can be used to this effect, for example this will produce an array of 5 random samples taken from a standard normal distribution (0 mean and variance 1):
275 # Finally, it is often useful to create arrays with random numbers that follow a specific distribution. The `np.random` module contains a number of functions that can be used to this effect, for example this will produce an array of 5 random samples taken from a standard normal distribution (0 mean and variance 1):
276
276
277 # In[23]:
277 # In[23]:
278 np.random.randn(5)
278 np.random.randn(5)
279
279
280 # Out[23]:
280 # Out[23]:
281 # array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
281 # array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
282
282
283
283
284 # whereas this will also give 5 samples, but from a normal distribution with a mean of 10 and a variance of 3:
284 # whereas this will also give 5 samples, but from a normal distribution with a mean of 10 and a variance of 3:
285
285
286 # In[24]:
286 # In[24]:
287 norm10 = np.random.normal(10, 3, 5)
287 norm10 = np.random.normal(10, 3, 5)
288 norm10
288 norm10
289
289
290 # Out[24]:
290 # Out[24]:
291 # array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
291 # array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
292
292
293
293
294 ### Indexing with other arrays
294 ### Indexing with other arrays
295
295
296 # Above we saw how to index arrays with single numbers and slices, just like Python lists. But arrays allow for a more sophisticated kind of indexing which is very powerful: you can index an array with another array, and in particular with an array of boolean values. This is particluarly useful to extract information from an array that matches a certain condition.
296 # Above we saw how to index arrays with single numbers and slices, just like Python lists. But arrays allow for a more sophisticated kind of indexing which is very powerful: you can index an array with another array, and in particular with an array of boolean values. This is particluarly useful to extract information from an array that matches a certain condition.
297 #
297 #
298 # Consider for example that in the array `norm10` we want to replace all values above 9 with the value 0. We can do so by first finding the *mask* that indicates where this condition is true or false:
298 # Consider for example that in the array `norm10` we want to replace all values above 9 with the value 0. We can do so by first finding the *mask* that indicates where this condition is true or false:
299
299
300 # In[25]:
300 # In[25]:
301 mask = norm10 > 9
301 mask = norm10 > 9
302 mask
302 mask
303
303
304 # Out[25]:
304 # Out[25]:
305 # array([False, False, False, True, True], dtype=bool)
305 # array([False, False, False, True, True], dtype=bool)
306
306
307
307
308 # Now that we have this mask, we can use it to either read those values or to reset them to 0:
308 # Now that we have this mask, we can use it to either read those values or to reset them to 0:
309
309
310 # In[26]:
310 # In[26]:
311 print 'Values above 9:', norm10[mask]
311 print 'Values above 9:', norm10[mask]
312
312
313 # Out[26]:
313 # Out[26]:
314 # Values above 9: [ 12.14944165 11.56209294]
314 # Values above 9: [ 12.14944165 11.56209294]
315 #
315 #
316 # In[27]:
316 # In[27]:
317 print 'Resetting all values above 9 to 0...'
317 print 'Resetting all values above 9 to 0...'
318 norm10[mask] = 0
318 norm10[mask] = 0
319 print norm10
319 print norm10
320
320
321 # Out[27]:
321 # Out[27]:
322 # Resetting all values above 9 to 0...
322 # Resetting all values above 9 to 0...
323 # [ 8.94879575 5.53038269 8.24847281 0. 0. ]
323 # [ 8.94879575 5.53038269 8.24847281 0. 0. ]
324 #
324 #
325 ### Arrays with more than one dimension
325 ### Arrays with more than one dimension
326
326
327 # Up until now all our examples have used one-dimensional arrays. But Numpy can create arrays of aribtrary dimensions, and all the methods illustrated in the previous section work with more than one dimension. For example, a list of lists can be used to initialize a two dimensional array:
327 # Up until now all our examples have used one-dimensional arrays. But Numpy can create arrays of aribtrary dimensions, and all the methods illustrated in the previous section work with more than one dimension. For example, a list of lists can be used to initialize a two dimensional array:
328
328
329 # In[28]:
329 # In[28]:
330 lst2 = [[1, 2], [3, 4]]
330 lst2 = [[1, 2], [3, 4]]
331 arr2 = np.array([[1, 2], [3, 4]])
331 arr2 = np.array([[1, 2], [3, 4]])
332 arr2
332 arr2
333
333
334 # Out[28]:
334 # Out[28]:
335 # array([[1, 2],
335 # array([[1, 2],
336 # [3, 4]])
336 # [3, 4]])
337
337
338
338
339 # With two-dimensional arrays we start seeing the power of numpy: while a nested list can be indexed using repeatedly the `[ ]` operator, multidimensional arrays support a much more natural indexing syntax with a single `[ ]` and a set of indices separated by commas:
339 # With two-dimensional arrays we start seeing the power of numpy: while a nested list can be indexed using repeatedly the `[ ]` operator, multidimensional arrays support a much more natural indexing syntax with a single `[ ]` and a set of indices separated by commas:
340
340
341 # In[29]:
341 # In[29]:
342 print lst2[0][1]
342 print lst2[0][1]
343 print arr2[0,1]
343 print arr2[0,1]
344
344
345 # Out[29]:
345 # Out[29]:
346 # 2
346 # 2
347 # 2
347 # 2
348 #
348 #
349 # Most of the array creation functions listed above can be used with more than one dimension, for example:
349 # Most of the array creation functions listed above can be used with more than one dimension, for example:
350
350
351 # In[30]:
351 # In[30]:
352 np.zeros((2,3))
352 np.zeros((2,3))
353
353
354 # Out[30]:
354 # Out[30]:
355 # array([[ 0., 0., 0.],
355 # array([[ 0., 0., 0.],
356 # [ 0., 0., 0.]])
356 # [ 0., 0., 0.]])
357
357
358
358
359 # In[31]:
359 # In[31]:
360 np.random.normal(10, 3, (2, 4))
360 np.random.normal(10, 3, (2, 4))
361
361
362 # Out[31]:
362 # Out[31]:
363 # array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
363 # array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
364 # [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
364 # [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
365
365
366
366
367 # In fact, the shape of an array can be changed at any time, as long as the total number of elements is unchanged. For example, if we want a 2x4 array with numbers increasing from 0, the easiest way to create it is:
367 # In fact, the shape of an array can be changed at any time, as long as the total number of elements is unchanged. For example, if we want a 2x4 array with numbers increasing from 0, the easiest way to create it is:
368
368
369 # In[32]:
369 # In[32]:
370 arr = np.arange(8).reshape(2,4)
370 arr = np.arange(8).reshape(2,4)
371 print arr
371 print arr
372
372
373 # Out[32]:
373 # Out[32]:
374 # [[0 1 2 3]
374 # [[0 1 2 3]
375 # [4 5 6 7]]
375 # [4 5 6 7]]
376 #
376 #
377 # With multidimensional arrays, you can also use slices, and you can mix and match slices and single indices in the different dimensions (using the same array as above):
377 # With multidimensional arrays, you can also use slices, and you can mix and match slices and single indices in the different dimensions (using the same array as above):
378
378
379 # In[33]:
379 # In[33]:
380 print 'Slicing in the second row:', arr[1, 2:4]
380 print 'Slicing in the second row:', arr[1, 2:4]
381 print 'All rows, third column :', arr[:, 2]
381 print 'All rows, third column :', arr[:, 2]
382
382
383 # Out[33]:
383 # Out[33]:
384 # Slicing in the second row: [6 7]
384 # Slicing in the second row: [6 7]
385 # All rows, third column : [2 6]
385 # All rows, third column : [2 6]
386 #
386 #
387 # If you only provide one index, then you will get an array with one less dimension containing that row:
387 # If you only provide one index, then you will get an array with one less dimension containing that row:
388
388
389 # In[34]:
389 # In[34]:
390 print 'First row: ', arr[0]
390 print 'First row: ', arr[0]
391 print 'Second row: ', arr[1]
391 print 'Second row: ', arr[1]
392
392
393 # Out[34]:
393 # Out[34]:
394 # First row: [0 1 2 3]
394 # First row: [0 1 2 3]
395 # Second row: [4 5 6 7]
395 # Second row: [4 5 6 7]
396 #
396 #
397 # Now that we have seen how to create arrays with more than one dimension, it's a good idea to look at some of the most useful properties and methods that arrays have. The following provide basic information about the size, shape and data in the array:
397 # Now that we have seen how to create arrays with more than one dimension, it's a good idea to look at some of the most useful properties and methods that arrays have. The following provide basic information about the size, shape and data in the array:
398
398
399 # In[35]:
399 # In[35]:
400 print 'Data type :', arr.dtype
400 print 'Data type :', arr.dtype
401 print 'Total number of elements :', arr.size
401 print 'Total number of elements :', arr.size
402 print 'Number of dimensions :', arr.ndim
402 print 'Number of dimensions :', arr.ndim
403 print 'Shape (dimensionality) :', arr.shape
403 print 'Shape (dimensionality) :', arr.shape
404 print 'Memory used (in bytes) :', arr.nbytes
404 print 'Memory used (in bytes) :', arr.nbytes
405
405
406 # Out[35]:
406 # Out[35]:
407 # Data type : int32
407 # Data type : int32
408 # Total number of elements : 8
408 # Total number of elements : 8
409 # Number of dimensions : 2
409 # Number of dimensions : 2
410 # Shape (dimensionality) : (2, 4)
410 # Shape (dimensionality) : (2, 4)
411 # Memory used (in bytes) : 32
411 # Memory used (in bytes) : 32
412 #
412 #
413 # Arrays also have many useful methods, some especially useful ones are:
413 # Arrays also have many useful methods, some especially useful ones are:
414
414
415 # In[36]:
415 # In[36]:
416 print 'Minimum and maximum :', arr.min(), arr.max()
416 print 'Minimum and maximum :', arr.min(), arr.max()
417 print 'Sum and product of all elements :', arr.sum(), arr.prod()
417 print 'Sum and product of all elements :', arr.sum(), arr.prod()
418 print 'Mean and standard deviation :', arr.mean(), arr.std()
418 print 'Mean and standard deviation :', arr.mean(), arr.std()
419
419
420 # Out[36]:
420 # Out[36]:
421 # Minimum and maximum : 0 7
421 # Minimum and maximum : 0 7
422 # Sum and product of all elements : 28 0
422 # Sum and product of all elements : 28 0
423 # Mean and standard deviation : 3.5 2.29128784748
423 # Mean and standard deviation : 3.5 2.29128784748
424 #
424 #
425 # For these methods, the above operations area all computed on all the elements of the array. But for a multidimensional array, it's possible to do the computation along a single dimension, by passing the `axis` parameter; for example:
425 # For these methods, the above operations area all computed on all the elements of the array. But for a multidimensional array, it's possible to do the computation along a single dimension, by passing the `axis` parameter; for example:
426
426
427 # In[37]:
427 # In[37]:
428 print 'For the following array:\n', arr
428 print 'For the following array:\n', arr
429 print 'The sum of elements along the rows is :', arr.sum(axis=1)
429 print 'The sum of elements along the rows is :', arr.sum(axis=1)
430 print 'The sum of elements along the columns is :', arr.sum(axis=0)
430 print 'The sum of elements along the columns is :', arr.sum(axis=0)
431
431
432 # Out[37]:
432 # Out[37]:
433 # For the following array:
433 # For the following array:
434 # [[0 1 2 3]
434 # [[0 1 2 3]
435 # [4 5 6 7]]
435 # [4 5 6 7]]
436 # The sum of elements along the rows is : [ 6 22]
436 # The sum of elements along the rows is : [ 6 22]
437 # The sum of elements along the columns is : [ 4 6 8 10]
437 # The sum of elements along the columns is : [ 4 6 8 10]
438 #
438 #
439 # As you can see in this example, the value of the `axis` parameter is the dimension which will be *consumed* once the operation has been carried out. This is why to sum along the rows we use `axis=0`.
439 # As you can see in this example, the value of the `axis` parameter is the dimension which will be *consumed* once the operation has been carried out. This is why to sum along the rows we use `axis=0`.
440 #
440 #
441 # This can be easily illustrated with an example that has more dimensions; we create an array with 4 dimensions and shape `(3,4,5,6)` and sum along the axis number 2 (i.e. the *third* axis, since in Python all counts are 0-based). That consumes the dimension whose length was 5, leaving us with a new array that has shape `(3,4,6)`:
441 # This can be easily illustrated with an example that has more dimensions; we create an array with 4 dimensions and shape `(3,4,5,6)` and sum along the axis number 2 (i.e. the *third* axis, since in Python all counts are 0-based). That consumes the dimension whose length was 5, leaving us with a new array that has shape `(3,4,6)`:
442
442
443 # In[38]:
443 # In[38]:
444 np.zeros((3,4,5,6)).sum(2).shape
444 np.zeros((3,4,5,6)).sum(2).shape
445
445
446 # Out[38]:
446 # Out[38]:
447 # (3, 4, 6)
447 # (3, 4, 6)
448
448
449
449
450 # Another widely used property of arrays is the `.T` attribute, which allows you to access the transpose of the array:
450 # Another widely used property of arrays is the `.T` attribute, which allows you to access the transpose of the array:
451
451
452 # In[39]:
452 # In[39]:
453 print 'Array:\n', arr
453 print 'Array:\n', arr
454 print 'Transpose:\n', arr.T
454 print 'Transpose:\n', arr.T
455
455
456 # Out[39]:
456 # Out[39]:
457 # Array:
457 # Array:
458 # [[0 1 2 3]
458 # [[0 1 2 3]
459 # [4 5 6 7]]
459 # [4 5 6 7]]
460 # Transpose:
460 # Transpose:
461 # [[0 4]
461 # [[0 4]
462 # [1 5]
462 # [1 5]
463 # [2 6]
463 # [2 6]
464 # [3 7]]
464 # [3 7]]
465 #
465 #
466 # We don't have time here to look at all the methods and properties of arrays, here's a complete list. Simply try exploring some of these IPython to learn more, or read their description in the full Numpy documentation:
466 # We don't have time here to look at all the methods and properties of arrays, here's a complete list. Simply try exploring some of these IPython to learn more, or read their description in the full Numpy documentation:
467 #
467 #
468 # arr.T arr.copy arr.getfield arr.put arr.squeeze
468 # arr.T arr.copy arr.getfield arr.put arr.squeeze
469 # arr.all arr.ctypes arr.imag arr.ravel arr.std
469 # arr.all arr.ctypes arr.imag arr.ravel arr.std
470 # arr.any arr.cumprod arr.item arr.real arr.strides
470 # arr.any arr.cumprod arr.item arr.real arr.strides
471 # arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
471 # arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
472 # arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
472 # arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
473 # arr.argsort arr.diagonal arr.max arr.resize arr.take
473 # arr.argsort arr.diagonal arr.max arr.resize arr.take
474 # arr.astype arr.dot arr.mean arr.round arr.tofile
474 # arr.astype arr.dot arr.mean arr.round arr.tofile
475 # arr.base arr.dtype arr.min arr.searchsorted arr.tolist
475 # arr.base arr.dtype arr.min arr.searchsorted arr.tolist
476 # arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
476 # arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
477 # arr.choose arr.dumps arr.ndim arr.setfield arr.trace
477 # arr.choose arr.dumps arr.ndim arr.setfield arr.trace
478 # arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
478 # arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
479 # arr.compress arr.flags arr.nonzero arr.shape arr.var
479 # arr.compress arr.flags arr.nonzero arr.shape arr.var
480 # arr.conj arr.flat arr.prod arr.size arr.view
480 # arr.conj arr.flat arr.prod arr.size arr.view
481 # arr.conjugate arr.flatten arr.ptp arr.sort
481 # arr.conjugate arr.flatten arr.ptp arr.sort
482
482
483 ### Operating with arrays
483 ### Operating with arrays
484
484
485 # Arrays support all regular arithmetic operators, and the numpy library also contains a complete collection of basic mathematical functions that operate on arrays. It is important to remember that in general, all operations with arrays are applied *element-wise*, i.e., are applied to all the elements of the array at the same time. Consider for example:
485 # Arrays support all regular arithmetic operators, and the numpy library also contains a complete collection of basic mathematical functions that operate on arrays. It is important to remember that in general, all operations with arrays are applied *element-wise*, i.e., are applied to all the elements of the array at the same time. Consider for example:
486
486
487 # In[40]:
487 # In[40]:
488 arr1 = np.arange(4)
488 arr1 = np.arange(4)
489 arr2 = np.arange(10, 14)
489 arr2 = np.arange(10, 14)
490 print arr1, '+', arr2, '=', arr1+arr2
490 print arr1, '+', arr2, '=', arr1+arr2
491
491
492 # Out[40]:
492 # Out[40]:
493 # [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
493 # [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
494 #
494 #
495 # Importantly, you must remember that even the multiplication operator is by default applied element-wise, it is *not* the matrix multiplication from linear algebra (as is the case in Matlab, for example):
495 # Importantly, you must remember that even the multiplication operator is by default applied element-wise, it is *not* the matrix multiplication from linear algebra (as is the case in Matlab, for example):
496
496
497 # In[41]:
497 # In[41]:
498 print arr1, '*', arr2, '=', arr1*arr2
498 print arr1, '*', arr2, '=', arr1*arr2
499
499
500 # Out[41]:
500 # Out[41]:
501 # [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
501 # [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
502 #
502 #
503 # While this means that in principle arrays must always match in their dimensionality in order for an operation to be valid, numpy will *broadcast* dimensions when possible. For example, suppose that you want to add the number 1.5 to `arr1`; the following would be a valid way to do it:
503 # While this means that in principle arrays must always match in their dimensionality in order for an operation to be valid, numpy will *broadcast* dimensions when possible. For example, suppose that you want to add the number 1.5 to `arr1`; the following would be a valid way to do it:
504
504
505 # In[42]:
505 # In[42]:
506 arr1 + 1.5*np.ones(4)
506 arr1 + 1.5*np.ones(4)
507
507
508 # Out[42]:
508 # Out[42]:
509 # array([ 1.5, 2.5, 3.5, 4.5])
509 # array([ 1.5, 2.5, 3.5, 4.5])
510
510
511
511
512 # But thanks to numpy's broadcasting rules, the following is equally valid:
512 # But thanks to numpy's broadcasting rules, the following is equally valid:
513
513
514 # In[43]:
514 # In[43]:
515 arr1 + 1.5
515 arr1 + 1.5
516
516
517 # Out[43]:
517 # Out[43]:
518 # array([ 1.5, 2.5, 3.5, 4.5])
518 # array([ 1.5, 2.5, 3.5, 4.5])
519
519
520
520
521 # In this case, numpy looked at both operands and saw that the first (`arr1`) was a one-dimensional array of length 4 and the second was a scalar, considered a zero-dimensional object. The broadcasting rules allow numpy to:
521 # In this case, numpy looked at both operands and saw that the first (`arr1`) was a one-dimensional array of length 4 and the second was a scalar, considered a zero-dimensional object. The broadcasting rules allow numpy to:
522 #
522 #
523 # * *create* new dimensions of length 1 (since this doesn't change the size of the array)
523 # * *create* new dimensions of length 1 (since this doesn't change the size of the array)
524 # * 'stretch' a dimension of length 1 that needs to be matched to a dimension of a different size.
524 # * 'stretch' a dimension of length 1 that needs to be matched to a dimension of a different size.
525 #
525 #
526 # So in the above example, the scalar 1.5 is effectively:
526 # So in the above example, the scalar 1.5 is effectively:
527 #
527 #
528 # * first 'promoted' to a 1-dimensional array of length 1
528 # * first 'promoted' to a 1-dimensional array of length 1
529 # * then, this array is 'stretched' to length 4 to match the dimension of `arr1`.
529 # * then, this array is 'stretched' to length 4 to match the dimension of `arr1`.
530 #
530 #
531 # After these two operations are complete, the addition can proceed as now both operands are one-dimensional arrays of length 4.
531 # After these two operations are complete, the addition can proceed as now both operands are one-dimensional arrays of length 4.
532 #
532 #
533 # This broadcasting behavior is in practice enormously powerful, especially because when numpy broadcasts to create new dimensions or to 'stretch' existing ones, it doesn't actually replicate the data. In the example above the operation is carried *as if* the 1.5 was a 1-d array with 1.5 in all of its entries, but no actual array was ever created. This can save lots of memory in cases when the arrays in question are large and can have significant performance implications.
533 # This broadcasting behavior is in practice enormously powerful, especially because when numpy broadcasts to create new dimensions or to 'stretch' existing ones, it doesn't actually replicate the data. In the example above the operation is carried *as if* the 1.5 was a 1-d array with 1.5 in all of its entries, but no actual array was ever created. This can save lots of memory in cases when the arrays in question are large and can have significant performance implications.
534 #
534 #
535 # The general rule is: when operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions, and works its way forward, creating dimensions of length 1 as needed. Two dimensions are considered compatible when
535 # The general rule is: when operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing dimensions, and works its way forward, creating dimensions of length 1 as needed. Two dimensions are considered compatible when
536 #
536 #
537 # * they are equal to begin with, or
537 # * they are equal to begin with, or
538 # * one of them is 1; in this case numpy will do the 'stretching' to make them equal.
538 # * one of them is 1; in this case numpy will do the 'stretching' to make them equal.
539 #
539 #
540 # If these conditions are not met, a `ValueError: frames are not aligned` exception is thrown, indicating that the arrays have incompatible shapes. The size of the resulting array is the maximum size along each dimension of the input arrays.
540 # If these conditions are not met, a `ValueError: frames are not aligned` exception is thrown, indicating that the arrays have incompatible shapes. The size of the resulting array is the maximum size along each dimension of the input arrays.
541
541
542 # This shows how the broadcasting rules work in several dimensions:
542 # This shows how the broadcasting rules work in several dimensions:
543
543
544 # In[44]:
544 # In[44]:
545 b = np.array([2, 3, 4, 5])
545 b = np.array([2, 3, 4, 5])
546 print arr, '\n\n+', b , '\n----------------\n', arr + b
546 print arr, '\n\n+', b , '\n----------------\n', arr + b
547
547
548 # Out[44]:
548 # Out[44]:
549 # [[0 1 2 3]
549 # [[0 1 2 3]
550 # [4 5 6 7]]
550 # [4 5 6 7]]
551 #
551 #
552 # + [2 3 4 5]
552 # + [2 3 4 5]
553 # ----------------
553 # ----------------
554 # [[ 2 4 6 8]
554 # [[ 2 4 6 8]
555 # [ 6 8 10 12]]
555 # [ 6 8 10 12]]
556 #
556 #
557 # Now, how could you use broadcasting to say add `[4, 6]` along the rows to `arr` above? Simply performing the direct addition will produce the error we previously mentioned:
557 # Now, how could you use broadcasting to say add `[4, 6]` along the rows to `arr` above? Simply performing the direct addition will produce the error we previously mentioned:
558
558
559 # In[45]:
559 # In[45]:
560 c = np.array([4, 6])
560 c = np.array([4, 6])
561 arr + c
561 arr + c
562
562
563 # Out[45]:
563 # Out[45]:
564 ---------------------------------------------------------------------------
564 ---------------------------------------------------------------------------
565 ValueError Traceback (most recent call last)
565 ValueError Traceback (most recent call last)
566 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
566 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
567 1 c = np.array([4, 6])
567 1 c = np.array([4, 6])
568 ----> 2 arr + c
568 ----> 2 arr + c
569
569
570 ValueError: operands could not be broadcast together with shapes (2,4) (2)
570 ValueError: operands could not be broadcast together with shapes (2,4) (2)
571
571
572 # According to the rules above, the array `c` would need to have a *trailing* dimension of 1 for the broadcasting to work. It turns out that numpy allows you to 'inject' new dimensions anywhere into an array on the fly, by indexing it with the special object `np.newaxis`:
572 # According to the rules above, the array `c` would need to have a *trailing* dimension of 1 for the broadcasting to work. It turns out that numpy allows you to 'inject' new dimensions anywhere into an array on the fly, by indexing it with the special object `np.newaxis`:
573
573
574 # In[46]:
574 # In[46]:
575 (c[:, np.newaxis]).shape
575 (c[:, np.newaxis]).shape
576
576
577 # Out[46]:
577 # Out[46]:
578 # (2, 1)
578 # (2, 1)
579
579
580
580
581 # This is exactly what we need, and indeed it works:
581 # This is exactly what we need, and indeed it works:
582
582
583 # In[47]:
583 # In[47]:
584 arr + c[:, np.newaxis]
584 arr + c[:, np.newaxis]
585
585
586 # Out[47]:
586 # Out[47]:
587 # array([[ 4, 5, 6, 7],
587 # array([[ 4, 5, 6, 7],
588 # [10, 11, 12, 13]])
588 # [10, 11, 12, 13]])
589
589
590
590
591 # For the full broadcasting rules, please see the official Numpy docs, which describe them in detail and with more complex examples.
591 # For the full broadcasting rules, please see the official Numpy docs, which describe them in detail and with more complex examples.
592
592
593 # As we mentioned before, Numpy ships with a full complement of mathematical functions that work on entire arrays, including logarithms, exponentials, trigonometric and hyperbolic trigonometric functions, etc. Furthermore, scipy ships a rich special function library in the `scipy.special` module that includes Bessel, Airy, Fresnel, Laguerre and other classical special functions. For example, sampling the sine function at 100 points between $0$ and $2\pi$ is as simple as:
593 # As we mentioned before, Numpy ships with a full complement of mathematical functions that work on entire arrays, including logarithms, exponentials, trigonometric and hyperbolic trigonometric functions, etc. Furthermore, scipy ships a rich special function library in the `scipy.special` module that includes Bessel, Airy, Fresnel, Laguerre and other classical special functions. For example, sampling the sine function at 100 points between $0$ and $2\pi$ is as simple as:
594
594
595 # In[48]:
595 # In[48]:
596 x = np.linspace(0, 2*np.pi, 100)
596 x = np.linspace(0, 2*np.pi, 100)
597 y = np.sin(x)
597 y = np.sin(x)
598
598
599 ### Linear algebra in numpy
599 ### Linear algebra in numpy
600
600
601 # Numpy ships with a basic linear algebra library, and all arrays have a `dot` method whose behavior is that of the scalar dot product when its arguments are vectors (one-dimensional arrays) and the traditional matrix multiplication when one or both of its arguments are two-dimensional arrays:
601 # Numpy ships with a basic linear algebra library, and all arrays have a `dot` method whose behavior is that of the scalar dot product when its arguments are vectors (one-dimensional arrays) and the traditional matrix multiplication when one or both of its arguments are two-dimensional arrays:
602
602
603 # In[49]:
603 # In[49]:
604 v1 = np.array([2, 3, 4])
604 v1 = np.array([2, 3, 4])
605 v2 = np.array([1, 0, 1])
605 v2 = np.array([1, 0, 1])
606 print v1, '.', v2, '=', v1.dot(v2)
606 print v1, '.', v2, '=', v1.dot(v2)
607
607
608 # Out[49]:
608 # Out[49]:
609 # [2 3 4] . [1 0 1] = 6
609 # [2 3 4] . [1 0 1] = 6
610 #
610 #
611 # Here is a regular matrix-vector multiplication, note that the array `v1` should be viewed as a *column* vector in traditional linear algebra notation; numpy makes no distinction between row and column vectors and simply verifies that the dimensions match the required rules of matrix multiplication, in this case we have a $2 \times 3$ matrix multiplied by a 3-vector, which produces a 2-vector:
611 # Here is a regular matrix-vector multiplication, note that the array `v1` should be viewed as a *column* vector in traditional linear algebra notation; numpy makes no distinction between row and column vectors and simply verifies that the dimensions match the required rules of matrix multiplication, in this case we have a $2 \times 3$ matrix multiplied by a 3-vector, which produces a 2-vector:
612
612
613 # In[50]:
613 # In[50]:
614 A = np.arange(6).reshape(2, 3)
614 A = np.arange(6).reshape(2, 3)
615 print A, 'x', v1, '=', A.dot(v1)
615 print A, 'x', v1, '=', A.dot(v1)
616
616
617 # Out[50]:
617 # Out[50]:
618 # [[0 1 2]
618 # [[0 1 2]
619 # [3 4 5]] x [2 3 4] = [11 38]
619 # [3 4 5]] x [2 3 4] = [11 38]
620 #
620 #
621 # For matrix-matrix multiplication, the same dimension-matching rules must be satisfied, e.g. consider the difference between $A \times A^T$:
621 # For matrix-matrix multiplication, the same dimension-matching rules must be satisfied, e.g. consider the difference between $A \times A^T$:
622
622
623 # In[51]:
623 # In[51]:
624 print A.dot(A.T)
624 print A.dot(A.T)
625
625
626 # Out[51]:
626 # Out[51]:
627 # [[ 5 14]
627 # [[ 5 14]
628 # [14 50]]
628 # [14 50]]
629 #
629 #
630 # and $A^T \times A$:
630 # and $A^T \times A$:
631
631
632 # In[52]:
632 # In[52]:
633 print A.T.dot(A)
633 print A.T.dot(A)
634
634
635 # Out[52]:
635 # Out[52]:
636 # [[ 9 12 15]
636 # [[ 9 12 15]
637 # [12 17 22]
637 # [12 17 22]
638 # [15 22 29]]
638 # [15 22 29]]
639 #
639 #
640 # Furthermore, the `numpy.linalg` module includes additional functionality such as determinants, matrix norms, Cholesky, eigenvalue and singular value decompositions, etc. For even more linear algebra tools, `scipy.linalg` contains the majority of the tools in the classic LAPACK libraries as well as functions to operate on sparse matrices. We refer the reader to the Numpy and Scipy documentations for additional details on these.
640 # Furthermore, the `numpy.linalg` module includes additional functionality such as determinants, matrix norms, Cholesky, eigenvalue and singular value decompositions, etc. For even more linear algebra tools, `scipy.linalg` contains the majority of the tools in the classic LAPACK libraries as well as functions to operate on sparse matrices. We refer the reader to the Numpy and Scipy documentations for additional details on these.
641
641
642 ### Reading and writing arrays to disk
642 ### Reading and writing arrays to disk
643
643
644 # Numpy lets you read and write arrays into files in a number of ways. In order to use these tools well, it is critical to understand the difference between a *text* and a *binary* file containing numerical data. In a text file, the number $\pi$ could be written as "3.141592653589793", for example: a string of digits that a human can read, with in this case 15 decimal digits. In contrast, that same number written to a binary file would be encoded as 8 characters (bytes) that are not readable by a human but which contain the exact same data that the variable `pi` had in the computer's memory.
644 # Numpy lets you read and write arrays into files in a number of ways. In order to use these tools well, it is critical to understand the difference between a *text* and a *binary* file containing numerical data. In a text file, the number $\pi$ could be written as "3.141592653589793", for example: a string of digits that a human can read, with in this case 15 decimal digits. In contrast, that same number written to a binary file would be encoded as 8 characters (bytes) that are not readable by a human but which contain the exact same data that the variable `pi` had in the computer's memory.
645 #
645 #
646 # The tradeoffs between the two modes are thus:
646 # The tradeoffs between the two modes are thus:
647 #
647 #
648 # * Text mode: occupies more space, precision can be lost (if not all digits are written to disk), but is readable and editable by hand with a text editor. Can *only* be used for one- and two-dimensional arrays.
648 # * Text mode: occupies more space, precision can be lost (if not all digits are written to disk), but is readable and editable by hand with a text editor. Can *only* be used for one- and two-dimensional arrays.
649 #
649 #
650 # * Binary mode: compact and exact representation of the data in memory, can't be read or edited by hand. Arrays of any size and dimensionality can be saved and read without loss of information.
650 # * Binary mode: compact and exact representation of the data in memory, can't be read or edited by hand. Arrays of any size and dimensionality can be saved and read without loss of information.
651 #
651 #
652 # First, let's see how to read and write arrays in text mode. The `np.savetxt` function saves an array to a text file, with options to control the precision, separators and even adding a header:
652 # First, let's see how to read and write arrays in text mode. The `np.savetxt` function saves an array to a text file, with options to control the precision, separators and even adding a header:
653
653
654 # In[53]:
654 # In[53]:
655 arr = np.arange(10).reshape(2, 5)
655 arr = np.arange(10).reshape(2, 5)
656 np.savetxt('test.out', arr, fmt='%.2e', header="My dataset")
656 np.savetxt('test.out', arr, fmt='%.2e', header="My dataset")
657 !cat test.out
657 !cat test.out
658
658
659 # Out[53]:
659 # Out[53]:
660 # # My dataset
660 # # My dataset
661 # 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
661 # 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
662 # 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
662 # 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
663 #
663 #
664 # And this same type of file can then be read with the matching `np.loadtxt` function:
664 # And this same type of file can then be read with the matching `np.loadtxt` function:
665
665
666 # In[54]:
666 # In[54]:
667 arr2 = np.loadtxt('test.out')
667 arr2 = np.loadtxt('test.out')
668 print arr2
668 print arr2
669
669
670 # Out[54]:
670 # Out[54]:
671 # [[ 0. 1. 2. 3. 4.]
671 # [[ 0. 1. 2. 3. 4.]
672 # [ 5. 6. 7. 8. 9.]]
672 # [ 5. 6. 7. 8. 9.]]
673 #
673 #
674 # For binary data, Numpy provides the `np.save` and `np.savez` routines. The first saves a single array to a file with `.npy` extension, while the latter can be used to save a *group* of arrays into a single file with `.npz` extension. The files created with these routines can then be read with the `np.load` function.
674 # For binary data, Numpy provides the `np.save` and `np.savez` routines. The first saves a single array to a file with `.npy` extension, while the latter can be used to save a *group* of arrays into a single file with `.npz` extension. The files created with these routines can then be read with the `np.load` function.
675 #
675 #
676 # Let us first see how to use the simpler `np.save` function to save a single array:
676 # Let us first see how to use the simpler `np.save` function to save a single array:
677
677
678 # In[55]:
678 # In[55]:
679 np.save('test.npy', arr2)
679 np.save('test.npy', arr2)
680 # Now we read this back
680 # Now we read this back
681 arr2n = np.load('test.npy')
681 arr2n = np.load('test.npy')
682 # Let's see if any element is non-zero in the difference.
682 # Let's see if any element is non-zero in the difference.
683 # A value of True would be a problem.
683 # A value of True would be a problem.
684 print 'Any differences?', np.any(arr2-arr2n)
684 print 'Any differences?', np.any(arr2-arr2n)
685
685
686 # Out[55]:
686 # Out[55]:
687 # Any differences? False
687 # Any differences? False
688 #
688 #
689 # Now let us see how the `np.savez` function works. You give it a filename and either a sequence of arrays or a set of keywords. In the first mode, the function will auotmatically name the saved arrays in the archive as `arr_0`, `arr_1`, etc:
689 # Now let us see how the `np.savez` function works. You give it a filename and either a sequence of arrays or a set of keywords. In the first mode, the function will auotmatically name the saved arrays in the archive as `arr_0`, `arr_1`, etc:
690
690
691 # In[56]:
691 # In[56]:
692 np.savez('test.npz', arr, arr2)
692 np.savez('test.npz', arr, arr2)
693 arrays = np.load('test.npz')
693 arrays = np.load('test.npz')
694 arrays.files
694 arrays.files
695
695
696 # Out[56]:
696 # Out[56]:
697 # ['arr_1', 'arr_0']
697 # ['arr_1', 'arr_0']
698
698
699
699
700 # Alternatively, we can explicitly choose how to name the arrays we save:
700 # Alternatively, we can explicitly choose how to name the arrays we save:
701
701
702 # In[57]:
702 # In[57]:
703 np.savez('test.npz', array1=arr, array2=arr2)
703 np.savez('test.npz', array1=arr, array2=arr2)
704 arrays = np.load('test.npz')
704 arrays = np.load('test.npz')
705 arrays.files
705 arrays.files
706
706
707 # Out[57]:
707 # Out[57]:
708 # ['array2', 'array1']
708 # ['array2', 'array1']
709
709
710
710
711 # The object returned by `np.load` from an `.npz` file works like a dictionary, though you can also access its constituent files by attribute using its special `.f` field; this is best illustrated with an example with the `arrays` object from above:
711 # The object returned by `np.load` from an `.npz` file works like a dictionary, though you can also access its constituent files by attribute using its special `.f` field; this is best illustrated with an example with the `arrays` object from above:
712
712
713 # In[58]:
713 # In[58]:
714 print 'First row of first array:', arrays['array1'][0]
714 print 'First row of first array:', arrays['array1'][0]
715 # This is an equivalent way to get the same field
715 # This is an equivalent way to get the same field
716 print 'First row of first array:', arrays.f.array1[0]
716 print 'First row of first array:', arrays.f.array1[0]
717
717
718 # Out[58]:
718 # Out[58]:
719 # First row of first array: [0 1 2 3 4]
719 # First row of first array: [0 1 2 3 4]
720 # First row of first array: [0 1 2 3 4]
720 # First row of first array: [0 1 2 3 4]
721 #
721 #
722 # This `.npz` format is a very convenient way to package compactly and without loss of information, into a single file, a group of related arrays that pertain to a specific problem. At some point, however, the complexity of your dataset may be such that the optimal approach is to use one of the standard formats in scientific data processing that have been designed to handle complex datasets, such as NetCDF or HDF5.
722 # This `.npz` format is a very convenient way to package compactly and without loss of information, into a single file, a group of related arrays that pertain to a specific problem. At some point, however, the complexity of your dataset may be such that the optimal approach is to use one of the standard formats in scientific data processing that have been designed to handle complex datasets, such as NetCDF or HDF5.
723 #
723 #
724 # Fortunately, there are tools for manipulating these formats in Python, and for storing data in other ways such as databases. A complete discussion of the possibilities is beyond the scope of this discussion, but of particular interest for scientific users we at least mention the following:
724 # Fortunately, there are tools for manipulating these formats in Python, and for storing data in other ways such as databases. A complete discussion of the possibilities is beyond the scope of this discussion, but of particular interest for scientific users we at least mention the following:
725 #
725 #
726 # * The `scipy.io` module contains routines to read and write Matlab files in `.mat` format and files in the NetCDF format that is widely used in certain scientific disciplines.
726 # * The `scipy.io` module contains routines to read and write Matlab files in `.mat` format and files in the NetCDF format that is widely used in certain scientific disciplines.
727 #
727 #
728 # * For manipulating files in the HDF5 format, there are two excellent options in Python: The PyTables project offers a high-level, object oriented approach to manipulating HDF5 datasets, while the h5py project offers a more direct mapping to the standard HDF5 library interface. Both are excellent tools; if you need to work with HDF5 datasets you should read some of their documentation and examples and decide which approach is a better match for your needs.
728 # * For manipulating files in the HDF5 format, there are two excellent options in Python: The PyTables project offers a high-level, object oriented approach to manipulating HDF5 datasets, while the h5py project offers a more direct mapping to the standard HDF5 library interface. Both are excellent tools; if you need to work with HDF5 datasets you should read some of their documentation and examples and decide which approach is a better match for your needs.
729
729
730 ## High quality data visualization with Matplotlib
730 ## High quality data visualization with Matplotlib
731
731
732 # The [matplotlib](http://matplotlib.sf.net) library is a powerful tool capable of producing complex publication-quality figures with fine layout control in two and three dimensions; here we will only provide a minimal self-contained introduction to its usage that covers the functionality needed for the rest of the book. We encourage the reader to read the tutorials included with the matplotlib documentation as well as to browse its extensive gallery of examples that include source code.
732 # The [matplotlib](http://matplotlib.sf.net) library is a powerful tool capable of producing complex publication-quality figures with fine layout control in two and three dimensions; here we will only provide a minimal self-contained introduction to its usage that covers the functionality needed for the rest of the book. We encourage the reader to read the tutorials included with the matplotlib documentation as well as to browse its extensive gallery of examples that include source code.
733 #
733 #
734 # Just as we typically use the shorthand `np` for Numpy, we will use `plt` for the `matplotlib.pyplot` module where the easy-to-use plotting functions reside (the library contains a rich object-oriented architecture that we don't have the space to discuss here):
734 # Just as we typically use the shorthand `np` for Numpy, we will use `plt` for the `matplotlib.pyplot` module where the easy-to-use plotting functions reside (the library contains a rich object-oriented architecture that we don't have the space to discuss here):
735
735
736 # In[59]:
736 # In[59]:
737 import matplotlib.pyplot as plt
737 import matplotlib.pyplot as plt
738
738
739 # The most frequently used function is simply called `plot`, here is how you can make a simple plot of $\sin(x)$ for $x \in [0, 2\pi]$ with labels and a grid (we use the semicolon in the last line to suppress the display of some information that is unnecessary right now):
739 # The most frequently used function is simply called `plot`, here is how you can make a simple plot of $\sin(x)$ for $x \in [0, 2\pi]$ with labels and a grid (we use the semicolon in the last line to suppress the display of some information that is unnecessary right now):
740
740
741 # In[60]:
741 # In[60]:
742 x = np.linspace(0, 2*np.pi)
742 x = np.linspace(0, 2*np.pi)
743 y = np.sin(x)
743 y = np.sin(x)
744 plt.plot(x,y, label='sin(x)')
744 plt.plot(x,y, label='sin(x)')
745 plt.legend()
745 plt.legend()
746 plt.grid()
746 plt.grid()
747 plt.title('Harmonic')
747 plt.title('Harmonic')
748 plt.xlabel('x')
748 plt.xlabel('x')
749 plt.ylabel('y');
749 plt.ylabel('y');
750
750
751 # Out[60]:
751 # Out[60]:
752 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.svg
752 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.svg
753
753
754 # You can control the style, color and other properties of the markers, for example:
754 # You can control the style, color and other properties of the markers, for example:
755
755
756 # In[61]:
756 # In[61]:
757 plt.plot(x, y, linewidth=2);
757 plt.plot(x, y, linewidth=2);
758
758
759 # Out[61]:
759 # Out[61]:
760 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.svg
760 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.svg
761
761
762 # In[62]:
762 # In[62]:
763 plt.plot(x, y, 'o', markersize=5, color='r');
763 plt.plot(x, y, 'o', markersize=5, color='r');
764
764
765 # Out[62]:
765 # Out[62]:
766 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.svg
766 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.svg
767
767
768 # We will now see how to create a few other common plot types, such as a simple error plot:
768 # We will now see how to create a few other common plot types, such as a simple error plot:
769
769
770 # In[63]:
770 # In[63]:
771 # example data
771 # example data
772 x = np.arange(0.1, 4, 0.5)
772 x = np.arange(0.1, 4, 0.5)
773 y = np.exp(-x)
773 y = np.exp(-x)
774
774
775 # example variable error bar values
775 # example variable error bar values
776 yerr = 0.1 + 0.2*np.sqrt(x)
776 yerr = 0.1 + 0.2*np.sqrt(x)
777 xerr = 0.1 + yerr
777 xerr = 0.1 + yerr
778
778
779 # First illustrate basic pyplot interface, using defaults where possible.
779 # First illustrate basic pyplot interface, using defaults where possible.
780 plt.figure()
780 plt.figure()
781 plt.errorbar(x, y, xerr=0.2, yerr=0.4)
781 plt.errorbar(x, y, xerr=0.2, yerr=0.4)
782 plt.title("Simplest errorbars, 0.2 in x, 0.4 in y");
782 plt.title("Simplest errorbars, 0.2 in x, 0.4 in y");
783
783
784 # Out[63]:
784 # Out[63]:
785 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.svg
785 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.svg
786
786
787 # A simple log plot
787 # A simple log plot
788
788
789 # In[64]:
789 # In[64]:
790 x = np.linspace(-5, 5)
790 x = np.linspace(-5, 5)
791 y = np.exp(-x**2)
791 y = np.exp(-x**2)
792 plt.semilogy(x, y);
792 plt.semilogy(x, y);
793
793
794 # Out[64]:
794 # Out[64]:
795 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.svg
795 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.svg
796
796
797 # A histogram annotated with text inside the plot, using the `text` function:
797 # A histogram annotated with text inside the plot, using the `text` function:
798
798
799 # In[65]:
799 # In[65]:
800 mu, sigma = 100, 15
800 mu, sigma = 100, 15
801 x = mu + sigma * np.random.randn(10000)
801 x = mu + sigma * np.random.randn(10000)
802
802
803 # the histogram of the data
803 # the histogram of the data
804 n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)
804 n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)
805
805
806 plt.xlabel('Smarts')
806 plt.xlabel('Smarts')
807 plt.ylabel('Probability')
807 plt.ylabel('Probability')
808 plt.title('Histogram of IQ')
808 plt.title('Histogram of IQ')
809 # This will put a text fragment at the position given:
809 # This will put a text fragment at the position given:
810 plt.text(55, .027, r'$\mu=100,\ \sigma=15$', fontsize=14)
810 plt.text(55, .027, r'$\mu=100,\ \sigma=15$', fontsize=14)
811 plt.axis([40, 160, 0, 0.03])
811 plt.axis([40, 160, 0, 0.03])
812 plt.grid(True)
812 plt.grid(True)
813
813
814 # Out[65]:
814 # Out[65]:
815 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.svg
815 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.svg
816
816
817 ### Image display
817 ### Image display
818
818
819 # The `imshow` command can display single or multi-channel images. A simple array of random numbers, plotted in grayscale:
819 # The `imshow` command can display single or multi-channel images. A simple array of random numbers, plotted in grayscale:
820
820
821 # In[66]:
821 # In[66]:
822 from matplotlib import cm
822 from matplotlib import cm
823 plt.imshow(np.random.rand(5, 10), cmap=cm.gray, interpolation='nearest');
823 plt.imshow(np.random.rand(5, 10), cmap=cm.gray, interpolation='nearest');
824
824
825 # Out[66]:
825 # Out[66]:
826 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.svg
826 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.svg
827
827
828 # A real photograph is a multichannel image, `imshow` interprets it correctly:
828 # A real photograph is a multichannel image, `imshow` interprets it correctly:
829
829
830 # In[67]:
830 # In[67]:
831 img = plt.imread('stinkbug.png')
831 img = plt.imread('stinkbug.png')
832 print 'Dimensions of the array img:', img.shape
832 print 'Dimensions of the array img:', img.shape
833 plt.imshow(img);
833 plt.imshow(img);
834
834
835 # Out[67]:
835 # Out[67]:
836 # Dimensions of the array img: (375, 500, 3)
836 # Dimensions of the array img: (375, 500, 3)
837 #
837 #
838 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.svg
838 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.svg
839
839
840 ### Simple 3d plotting with matplotlib
840 ### Simple 3d plotting with matplotlib
841
841
842 # Note that you must execute at least once in your session:
842 # Note that you must execute at least once in your session:
843
843
844 # In[68]:
844 # In[68]:
845 from mpl_toolkits.mplot3d import Axes3D
845 from mpl_toolkits.mplot3d import Axes3D
846
846
847 # One this has been done, you can create 3d axes with the `projection='3d'` keyword to `add_subplot`:
847 # One this has been done, you can create 3d axes with the `projection='3d'` keyword to `add_subplot`:
848 #
848 #
849 # fig = plt.figure()
849 # fig = plt.figure()
850 # fig.add_subplot(<other arguments here>, projection='3d')
850 # fig.add_subplot(<other arguments here>, projection='3d')
851
851
852 # A simple surface plot:
852 # A simple surface plot:
853
853
854 # In[72]:
854 # In[72]:
855 from mpl_toolkits.mplot3d.axes3d import Axes3D
855 from mpl_toolkits.mplot3d.axes3d import Axes3D
856 from matplotlib import cm
856 from matplotlib import cm
857
857
858 fig = plt.figure()
858 fig = plt.figure()
859 ax = fig.add_subplot(1, 1, 1, projection='3d')
859 ax = fig.add_subplot(1, 1, 1, projection='3d')
860 X = np.arange(-5, 5, 0.25)
860 X = np.arange(-5, 5, 0.25)
861 Y = np.arange(-5, 5, 0.25)
861 Y = np.arange(-5, 5, 0.25)
862 X, Y = np.meshgrid(X, Y)
862 X, Y = np.meshgrid(X, Y)
863 R = np.sqrt(X**2 + Y**2)
863 R = np.sqrt(X**2 + Y**2)
864 Z = np.sin(R)
864 Z = np.sin(R)
865 surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet,
865 surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet,
866 linewidth=0, antialiased=False)
866 linewidth=0, antialiased=False)
867 ax.set_zlim3d(-1.01, 1.01);
867 ax.set_zlim3d(-1.01, 1.01);
868
868
869 # Out[72]:
869 # Out[72]:
870 # image file: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.svg
870 # image file: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.svg
871
871
872 ## IPython: a powerful interactive environment
872 ## IPython: a powerful interactive environment
873
873
874 # A key component of the everyday workflow of most scientific computing environments is a good interactive environment, that is, a system in which you can execute small amounts of code and view the results immediately, combining both printing out data and opening graphical visualizations. All modern systems for scientific computing, commercial and open source, include such functionality.
874 # A key component of the everyday workflow of most scientific computing environments is a good interactive environment, that is, a system in which you can execute small amounts of code and view the results immediately, combining both printing out data and opening graphical visualizations. All modern systems for scientific computing, commercial and open source, include such functionality.
875 #
875 #
876 # Out of the box, Python also offers a simple interactive shell with very limited capabilities. But just like the scientific community built Numpy to provide arrays suited for scientific work (since Pytyhon's lists aren't optimal for this task), it has also developed an interactive environment much more sophisticated than the built-in one. The [IPython project](http://ipython.org) offers a set of tools to make productive use of the Python language, all the while working interactively and with immedate feedback on your results. The basic tools that IPython provides are:
876 # Out of the box, Python also offers a simple interactive shell with very limited capabilities. But just like the scientific community built Numpy to provide arrays suited for scientific work (since Pytyhon's lists aren't optimal for this task), it has also developed an interactive environment much more sophisticated than the built-in one. The [IPython project](http://ipython.org) offers a set of tools to make productive use of the Python language, all the while working interactively and with immedate feedback on your results. The basic tools that IPython provides are:
877 #
877 #
878 # 1. A powerful terminal shell, with many features designed to increase the fluidity and productivity of everyday scientific workflows, including:
878 # 1. A powerful terminal shell, with many features designed to increase the fluidity and productivity of everyday scientific workflows, including:
879 #
879 #
880 # * rich introspection of all objects and variables including easy access to the source code of any function
880 # * rich introspection of all objects and variables including easy access to the source code of any function
881 # * powerful and extensible tab completion of variables and filenames,
881 # * powerful and extensible tab completion of variables and filenames,
882 # * tight integration with matplotlib, supporting interactive figures that don't block the terminal,
882 # * tight integration with matplotlib, supporting interactive figures that don't block the terminal,
883 # * direct access to the filesystem and underlying operating system,
883 # * direct access to the filesystem and underlying operating system,
884 # * an extensible system for shell-like commands called 'magics' that reduce the work needed to perform many common tasks,
884 # * an extensible system for shell-like commands called 'magics' that reduce the work needed to perform many common tasks,
885 # * tools for easily running, timing, profiling and debugging your codes,
885 # * tools for easily running, timing, profiling and debugging your codes,
886 # * syntax highlighted error messages with much more detail than the default Python ones,
886 # * syntax highlighted error messages with much more detail than the default Python ones,
887 # * logging and access to all previous history of inputs, including across sessions
887 # * logging and access to all previous history of inputs, including across sessions
888 #
888 #
889 # 2. A Qt console that provides the look and feel of a terminal, but adds support for inline figures, graphical calltips, a persistent session that can survive crashes (even segfaults) of the kernel process, and more.
889 # 2. A Qt console that provides the look and feel of a terminal, but adds support for inline figures, graphical calltips, a persistent session that can survive crashes (even segfaults) of the kernel process, and more.
890 #
890 #
891 # 3. A web-based notebook that can execute code and also contain rich text and figures, mathematical equations and arbitrary HTML. This notebook presents a document-like view with cells where code is executed but that can be edited in-place, reordered, mixed with explanatory text and figures, etc.
891 # 3. A web-based notebook that can execute code and also contain rich text and figures, mathematical equations and arbitrary HTML. This notebook presents a document-like view with cells where code is executed but that can be edited in-place, reordered, mixed with explanatory text and figures, etc.
892 #
892 #
893 # 4. A high-performance, low-latency system for parallel computing that supports the control of a cluster of IPython engines communicating over a network, with optimizations that minimize unnecessary copying of large objects (especially numpy arrays).
893 # 4. A high-performance, low-latency system for parallel computing that supports the control of a cluster of IPython engines communicating over a network, with optimizations that minimize unnecessary copying of large objects (especially numpy arrays).
894 #
894 #
895 # We will now discuss the highlights of the tools 1-3 above so that you can make them an effective part of your workflow. The topic of parallel computing is beyond the scope of this document, but we encourage you to read the extensive [documentation](http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html) and [tutorials](http://minrk.github.com/scipy-tutorial-2011/) on this available on the IPython website.
895 # We will now discuss the highlights of the tools 1-3 above so that you can make them an effective part of your workflow. The topic of parallel computing is beyond the scope of this document, but we encourage you to read the extensive [documentation](http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html) and [tutorials](http://minrk.github.com/scipy-tutorial-2011/) on this available on the IPython website.
896
896
897 ### The IPython terminal
897 ### The IPython terminal
898
898
899 # You can start IPython at the terminal simply by typing:
899 # You can start IPython at the terminal simply by typing:
900 #
900 #
901 # $ ipython
901 # $ ipython
902 #
902 #
903 # which will provide you some basic information about how to get started and will then open a prompt labeled `In [1]:` for you to start typing. Here we type $2^{64}$ and Python computes the result for us in exact arithmetic, returning it as `Out[1]`:
903 # which will provide you some basic information about how to get started and will then open a prompt labeled `In [1]:` for you to start typing. Here we type $2^{64}$ and Python computes the result for us in exact arithmetic, returning it as `Out[1]`:
904 #
904 #
905 # $ ipython
905 # $ ipython
906 # Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
906 # Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
907 # Type "copyright", "credits" or "license" for more information.
907 # Type "copyright", "credits" or "license" for more information.
908 #
908 #
909 # IPython 0.13.dev -- An enhanced Interactive Python.
909 # IPython 0.13.dev -- An enhanced Interactive Python.
910 # ? -> Introduction and overview of IPython's features.
910 # ? -> Introduction and overview of IPython's features.
911 # %quickref -> Quick reference.
911 # %quickref -> Quick reference.
912 # help -> Python's own help system.
912 # help -> Python's own help system.
913 # object? -> Details about 'object', use 'object??' for extra details.
913 # object? -> Details about 'object', use 'object??' for extra details.
914 #
914 #
915 # In [1]: 2**64
915 # In [1]: 2**64
916 # Out[1]: 18446744073709551616L
916 # Out[1]: 18446744073709551616L
917 #
917 #
918 # The first thing you should know about IPython is that all your inputs and outputs are saved. There are two variables named `In` and `Out` which are filled as you work with your results. Furthermore, all outputs are also saved to auto-created variables of the form `_NN` where `NN` is the prompt number, and inputs to `_iNN`. This allows you to recover quickly the result of a prior computation by referring to its number even if you forgot to store it as a variable. For example, later on in the above session you can do:
918 # The first thing you should know about IPython is that all your inputs and outputs are saved. There are two variables named `In` and `Out` which are filled as you work with your results. Furthermore, all outputs are also saved to auto-created variables of the form `_NN` where `NN` is the prompt number, and inputs to `_iNN`. This allows you to recover quickly the result of a prior computation by referring to its number even if you forgot to store it as a variable. For example, later on in the above session you can do:
919 #
919 #
920 # In [6]: print _1
920 # In [6]: print _1
921 # 18446744073709551616
921 # 18446744073709551616
922
922
923 # We strongly recommend that you take a few minutes to read at least the basic introduction provided by the `?` command, and keep in mind that the `%quickref` command at all times can be used as a quick reference "cheat sheet" of the most frequently used features of IPython.
923 # We strongly recommend that you take a few minutes to read at least the basic introduction provided by the `?` command, and keep in mind that the `%quickref` command at all times can be used as a quick reference "cheat sheet" of the most frequently used features of IPython.
924 #
924 #
925 # At the IPython prompt, any valid Python code that you type will be executed similarly to the default Python shell (though often with more informative feedback). But since IPython is a *superset* of the default Python shell; let's have a brief look at some of its additional functionality.
925 # At the IPython prompt, any valid Python code that you type will be executed similarly to the default Python shell (though often with more informative feedback). But since IPython is a *superset* of the default Python shell; let's have a brief look at some of its additional functionality.
926
926
927 # **Object introspection**
927 # **Object introspection**
928 #
928 #
929 # A simple `?` command provides a general introduction to IPython, but as indicated in the banner above, you can use the `?` syntax to ask for details about any object. For example, if we type `_1?`, IPython will print the following details about this variable:
929 # A simple `?` command provides a general introduction to IPython, but as indicated in the banner above, you can use the `?` syntax to ask for details about any object. For example, if we type `_1?`, IPython will print the following details about this variable:
930 #
930 #
931 # In [14]: _1?
931 # In [14]: _1?
932 # Type: long
932 # Type: long
933 # Base Class: <type 'long'>
933 # Base Class: <type 'long'>
934 # String Form:18446744073709551616
934 # String Form:18446744073709551616
935 # Namespace: Interactive
935 # Namespace: Interactive
936 # Docstring:
936 # Docstring:
937 # long(x[, base]) -> integer
937 # long(x[, base]) -> integer
938 #
938 #
939 # Convert a string or number to a long integer, if possible. A floating
939 # Convert a string or number to a long integer, if possible. A floating
940 #
940 #
941 # [etc... snipped for brevity]
941 # [etc... snipped for brevity]
942 #
942 #
943 # If you add a second `?` and for any oobject `x` type `x??`, IPython will try to provide an even more detailed analsysi of the object, including its syntax-highlighted source code when it can be found. It's possible that `x??` returns the same information as `x?`, but in many cases `x??` will indeed provide additional details.
943 # If you add a second `?` and for any oobject `x` type `x??`, IPython will try to provide an even more detailed analsysi of the object, including its syntax-highlighted source code when it can be found. It's possible that `x??` returns the same information as `x?`, but in many cases `x??` will indeed provide additional details.
944 #
944 #
945 # Finally, the `?` syntax is also useful to search *namespaces* with wildcards. Suppose you are wondering if there is any function in Numpy that may do text-related things; with `np.*txt*?`, IPython will print all the names in the `np` namespace (our Numpy shorthand) that have 'txt' anywhere in their name:
945 # Finally, the `?` syntax is also useful to search *namespaces* with wildcards. Suppose you are wondering if there is any function in Numpy that may do text-related things; with `np.*txt*?`, IPython will print all the names in the `np` namespace (our Numpy shorthand) that have 'txt' anywhere in their name:
946 #
946 #
947 # In [17]: np.*txt*?
947 # In [17]: np.*txt*?
948 # np.genfromtxt
948 # np.genfromtxt
949 # np.loadtxt
949 # np.loadtxt
950 # np.mafromtxt
950 # np.mafromtxt
951 # np.ndfromtxt
951 # np.ndfromtxt
952 # np.recfromtxt
952 # np.recfromtxt
953 # np.savetxt
953 # np.savetxt
954
954
955 # **Tab completion**
955 # **Tab completion**
956 #
956 #
957 # IPython makes the tab key work extra hard for you as a way to rapidly inspect objects and libraries. Whenever you have typed something at the prompt, by hitting the `<tab>` key IPython will try to complete the rest of the line. For this, IPython will analyze the text you had so far and try to search for Python data or files that may match the context you have already provided.
957 # IPython makes the tab key work extra hard for you as a way to rapidly inspect objects and libraries. Whenever you have typed something at the prompt, by hitting the `<tab>` key IPython will try to complete the rest of the line. For this, IPython will analyze the text you had so far and try to search for Python data or files that may match the context you have already provided.
958 #
958 #
959 # For example, if you type `np.load` and hit the <tab> key, you'll see:
959 # For example, if you type `np.load` and hit the <tab> key, you'll see:
960 #
960 #
961 # In [21]: np.load<TAB HERE>
961 # In [21]: np.load<TAB HERE>
962 # np.load np.loads np.loadtxt
962 # np.load np.loads np.loadtxt
963 #
963 #
964 # so you can quickly find all the load-related functionality in numpy. Tab completion works even for function arguments, for example consider this function definition:
964 # so you can quickly find all the load-related functionality in numpy. Tab completion works even for function arguments, for example consider this function definition:
965 #
965 #
966 # In [20]: def f(x, frobinate=False):
966 # In [20]: def f(x, frobinate=False):
967 # ....: if frobinate:
967 # ....: if frobinate:
968 # ....: return x**2
968 # ....: return x**2
969 # ....:
969 # ....:
970 #
970 #
971 # If you now use the `<tab>` key after having typed 'fro' you'll get all valid Python completions, but those marked with `=` at the end are known to be keywords of your function:
971 # If you now use the `<tab>` key after having typed 'fro' you'll get all valid Python completions, but those marked with `=` at the end are known to be keywords of your function:
972 #
972 #
973 # In [21]: f(2, fro<TAB HERE>
973 # In [21]: f(2, fro<TAB HERE>
974 # frobinate= frombuffer fromfunction frompyfunc fromstring
974 # frobinate= frombuffer fromfunction frompyfunc fromstring
975 # from fromfile fromiter fromregex frozenset
975 # from fromfile fromiter fromregex frozenset
976 #
976 #
977 # at this point you can add the `b` letter and hit `<tab>` once more, and IPython will finish the line for you:
977 # at this point you can add the `b` letter and hit `<tab>` once more, and IPython will finish the line for you:
978 #
978 #
979 # In [21]: f(2, frobinate=
979 # In [21]: f(2, frobinate=
980 #
980 #
981 # As a beginner, simply get into the habit of using `<tab>` after most objects; it should quickly become second nature as you will see how helps keep a fluid workflow and discover useful information. Later on you can also customize this behavior by writing your own completion code, if you so desire.
981 # As a beginner, simply get into the habit of using `<tab>` after most objects; it should quickly become second nature as you will see how helps keep a fluid workflow and discover useful information. Later on you can also customize this behavior by writing your own completion code, if you so desire.
982
982
983 # **Matplotlib integration**
983 # **Matplotlib integration**
984 #
984 #
985 # One of the most useful features of IPython for scientists is its tight integration with matplotlib: at the terminal IPython lets you open matplotlib figures without blocking your typing (which is what happens if you try to do the same thing at the default Python shell), and in the Qt console and notebook you can even view your figures embedded in your workspace next to the code that created them.
985 # One of the most useful features of IPython for scientists is its tight integration with matplotlib: at the terminal IPython lets you open matplotlib figures without blocking your typing (which is what happens if you try to do the same thing at the default Python shell), and in the Qt console and notebook you can even view your figures embedded in your workspace next to the code that created them.
986 #
986 #
987 # The matplotlib support can be either activated when you start IPython by passing the `--pylab` flag, or at any point later in your session by using the `%pylab` command. If you start IPython with `--pylab`, you'll see something like this (note the extra message about pylab):
987 # The matplotlib support can be either activated when you start IPython by passing the `--pylab` flag, or at any point later in your session by using the `%pylab` command. If you start IPython with `--pylab`, you'll see something like this (note the extra message about pylab):
988 #
988 #
989 # $ ipython --pylab
989 # $ ipython --pylab
990 # Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
990 # Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
991 # Type "copyright", "credits" or "license" for more information.
991 # Type "copyright", "credits" or "license" for more information.
992 #
992 #
993 # IPython 0.13.dev -- An enhanced Interactive Python.
993 # IPython 0.13.dev -- An enhanced Interactive Python.
994 # ? -> Introduction and overview of IPython's features.
994 # ? -> Introduction and overview of IPython's features.
995 # %quickref -> Quick reference.
995 # %quickref -> Quick reference.
996 # help -> Python's own help system.
996 # help -> Python's own help system.
997 # object? -> Details about 'object', use 'object??' for extra details.
997 # object? -> Details about 'object', use 'object??' for extra details.
998 #
998 #
999 # Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
999 # Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
1000 # For more information, type 'help(pylab)'.
1000 # For more information, type 'help(pylab)'.
1001 #
1001 #
1002 # In [1]:
1002 # In [1]:
1003 #
1003 #
1004 # Furthermore, IPython will import `numpy` with the `np` shorthand, `matplotlib.pyplot` as `plt`, and it will also load all of the numpy and pyplot top-level names so that you can directly type something like:
1004 # Furthermore, IPython will import `numpy` with the `np` shorthand, `matplotlib.pyplot` as `plt`, and it will also load all of the numpy and pyplot top-level names so that you can directly type something like:
1005 #
1005 #
1006 # In [1]: x = linspace(0, 2*pi, 200)
1006 # In [1]: x = linspace(0, 2*pi, 200)
1007 #
1007 #
1008 # In [2]: plot(x, sin(x))
1008 # In [2]: plot(x, sin(x))
1009 # Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1009 # Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1010 #
1010 #
1011 # instead of having to prefix each call with its full signature (as we have been doing in the examples thus far):
1011 # instead of having to prefix each call with its full signature (as we have been doing in the examples thus far):
1012 #
1012 #
1013 # In [3]: x = np.linspace(0, 2*np.pi, 200)
1013 # In [3]: x = np.linspace(0, 2*np.pi, 200)
1014 #
1014 #
1015 # In [4]: plt.plot(x, np.sin(x))
1015 # In [4]: plt.plot(x, np.sin(x))
1016 # Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1016 # Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1017 #
1017 #
1018 # This shorthand notation can be a huge time-saver when working interactively (it's a few characters but you are likely to type them hundreds of times in a session). But we should note that as you develop persistent scripts and notebooks meant for reuse, it's best to get in the habit of using the longer notation (known as *fully qualified names* as it's clearer where things come from and it makes for more robust, readable and maintainable code in the long run).
1018 # This shorthand notation can be a huge time-saver when working interactively (it's a few characters but you are likely to type them hundreds of times in a session). But we should note that as you develop persistent scripts and notebooks meant for reuse, it's best to get in the habit of using the longer notation (known as *fully qualified names* as it's clearer where things come from and it makes for more robust, readable and maintainable code in the long run).
1019
1019
1020 # **Access to the operating system and files**
1020 # **Access to the operating system and files**
1021 #
1021 #
1022 # In IPython, you can type `ls` to see your files or `cd` to change directories, just like you would at a regular system prompt:
1022 # In IPython, you can type `ls` to see your files or `cd` to change directories, just like you would at a regular system prompt:
1023 #
1023 #
1024 # In [2]: cd tests
1024 # In [2]: cd tests
1025 # /home/fperez/ipython/nbconvert/tests
1025 # /home/fperez/ipython/nbconvert/tests
1026 #
1026 #
1027 # In [3]: ls test.*
1027 # In [3]: ls test.*
1028 # test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1028 # test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1029 #
1029 #
1030 # Furthermore, if you use the `!` at the beginning of a line, any commands you pass afterwards go directly to the operating system:
1030 # Furthermore, if you use the `!` at the beginning of a line, any commands you pass afterwards go directly to the operating system:
1031 #
1031 #
1032 # In [4]: !echo "Hello IPython"
1032 # In [4]: !echo "Hello IPython"
1033 # Hello IPython
1033 # Hello IPython
1034 #
1034 #
1035 # IPython offers a useful twist in this feature: it will substitute in the command the value of any *Python* variable you may have if you prepend it with a `$` sign:
1035 # IPython offers a useful twist in this feature: it will substitute in the command the value of any *Python* variable you may have if you prepend it with a `$` sign:
1036 #
1036 #
1037 # In [5]: message = 'IPython interpolates from Python to the shell'
1037 # In [5]: message = 'IPython interpolates from Python to the shell'
1038 #
1038 #
1039 # In [6]: !echo $message
1039 # In [6]: !echo $message
1040 # IPython interpolates from Python to the shell
1040 # IPython interpolates from Python to the shell
1041 #
1041 #
1042 # This feature can be extremely useful, as it lets you combine the power and clarity of Python for complex logic with the immediacy and familiarity of many shell commands. Additionally, if you start the line with *two* `$$` signs, the output of the command will be automatically captured as a list of lines, e.g.:
1042 # This feature can be extremely useful, as it lets you combine the power and clarity of Python for complex logic with the immediacy and familiarity of many shell commands. Additionally, if you start the line with *two* `$$` signs, the output of the command will be automatically captured as a list of lines, e.g.:
1043 #
1043 #
1044 # In [10]: !!ls test.*
1044 # In [10]: !!ls test.*
1045 # Out[10]:
1045 # Out[10]:
1046 # ['test.aux',
1046 # ['test.aux',
1047 # 'test.html',
1047 # 'test.html',
1048 # 'test.ipynb',
1048 # 'test.ipynb',
1049 # 'test.log',
1049 # 'test.log',
1050 # 'test.out',
1050 # 'test.out',
1051 # 'test.pdf',
1051 # 'test.pdf',
1052 # 'test.rst',
1052 # 'test.rst',
1053 # 'test.tex']
1053 # 'test.tex']
1054 #
1054 #
1055 # As explained above, you can now use this as the variable `_10`. If you directly want to capture the output of a system command to a Python variable, you can use the syntax `=!`:
1055 # As explained above, you can now use this as the variable `_10`. If you directly want to capture the output of a system command to a Python variable, you can use the syntax `=!`:
1056 #
1056 #
1057 # In [11]: testfiles =! ls test.*
1057 # In [11]: testfiles =! ls test.*
1058 #
1058 #
1059 # In [12]: print testfiles
1059 # In [12]: print testfiles
1060 # ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
1060 # ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
1061 #
1061 #
1062 # Finally, the special `%alias` command lets you define names that are shorthands for system commands, so that you can type them without having to prefix them via `!` explicitly (for example, `ls` is an alias that has been predefined for you at startup).
1062 # Finally, the special `%alias` command lets you define names that are shorthands for system commands, so that you can type them without having to prefix them via `!` explicitly (for example, `ls` is an alias that has been predefined for you at startup).
1063
1063
1064 # **Magic commands**
1064 # **Magic commands**
1065 #
1065 #
1066 # IPython has a system for special commands, called 'magics', that let you control IPython itself and perform many common tasks with a more shell-like syntax: it uses spaces for delimiting arguments, flags can be set with dashes and all arguments are treated as strings, so no additional quoting is required. This kind of syntax is invalid in the Python language but very convenient for interactive typing (less parentheses, commans and quoting everywhere); IPython distinguishes the two by detecting lines that start with the `%` character.
1066 # IPython has a system for special commands, called 'magics', that let you control IPython itself and perform many common tasks with a more shell-like syntax: it uses spaces for delimiting arguments, flags can be set with dashes and all arguments are treated as strings, so no additional quoting is required. This kind of syntax is invalid in the Python language but very convenient for interactive typing (less parentheses, commans and quoting everywhere); IPython distinguishes the two by detecting lines that start with the `%` character.
1067 #
1067 #
1068 # You can learn more about the magic system by simply typing `%magic` at the prompt, which will give you a short description plus the documentation on *all* available magics. If you want to see only a listing of existing magics, you can use `%lsmagic`:
1068 # You can learn more about the magic system by simply typing `%magic` at the prompt, which will give you a short description plus the documentation on *all* available magics. If you want to see only a listing of existing magics, you can use `%lsmagic`:
1069 #
1069 #
1070 # In [4]: lsmagic
1070 # In [4]: lsmagic
1071 # Available magic functions:
1071 # Available magic functions:
1072 # %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
1072 # %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
1073 # %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
1073 # %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
1074 # %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
1074 # %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
1075 # %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
1075 # %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
1076 # %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
1076 # %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
1077 # %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
1077 # %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
1078 # %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
1078 # %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
1079 # %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
1079 # %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
1080 #
1080 #
1081 # Automagic is ON, % prefix NOT needed for magic functions.
1081 # Automagic is ON, % prefix NOT needed for magic functions.
1082 #
1082 #
1083 # Note how the example above omitted the eplicit `%` marker and simply uses `lsmagic`. As long as the 'automagic' feature is on (which it is by default), you can omit the `%` marker as long as there is no ambiguity with a Python variable of the same name.
1083 # Note how the example above omitted the eplicit `%` marker and simply uses `lsmagic`. As long as the 'automagic' feature is on (which it is by default), you can omit the `%` marker as long as there is no ambiguity with a Python variable of the same name.
1084
1084
1085 # **Running your code**
1085 # **Running your code**
1086 #
1086 #
1087 # While it's easy to type a few lines of code in IPython, for any long-lived work you should keep your codes in Python scripts (or in IPython notebooks, see below). Consider that you have a script, in this case trivially simple for the sake of brevity, named `simple.py`:
1087 # While it's easy to type a few lines of code in IPython, for any long-lived work you should keep your codes in Python scripts (or in IPython notebooks, see below). Consider that you have a script, in this case trivially simple for the sake of brevity, named `simple.py`:
1088 #
1088 #
1089 # In [12]: !cat simple.py
1089 # In [12]: !cat simple.py
1090 # import numpy as np
1090 # import numpy as np
1091 #
1091 #
1092 # x = np.random.normal(size=100)
1092 # x = np.random.normal(size=100)
1093 #
1093 #
1094 # print 'First elment of x:', x[0]
1094 # print 'First elment of x:', x[0]
1095 #
1095 #
1096 # The typical workflow with IPython is to use the `%run` magic to execute your script (you can omit the .py extension if you want). When you run it, the script will execute just as if it had been run at the system prompt with `python simple.py` (though since modules don't get re-executed on new imports by Python, all system initialization is essentially free, which can have a significant run time impact in some cases):
1096 # The typical workflow with IPython is to use the `%run` magic to execute your script (you can omit the .py extension if you want). When you run it, the script will execute just as if it had been run at the system prompt with `python simple.py` (though since modules don't get re-executed on new imports by Python, all system initialization is essentially free, which can have a significant run time impact in some cases):
1097 #
1097 #
1098 # In [13]: run simple
1098 # In [13]: run simple
1099 # First elment of x: -1.55872256289
1099 # First elment of x: -1.55872256289
1100 #
1100 #
1101 # Once it completes, all variables defined in it become available for you to use interactively:
1101 # Once it completes, all variables defined in it become available for you to use interactively:
1102 #
1102 #
1103 # In [14]: x.shape
1103 # In [14]: x.shape
1104 # Out[14]: (100,)
1104 # Out[14]: (100,)
1105 #
1105 #
1106 # This allows you to plot data, try out ideas, etc, in a `%run`/interact/edit cycle that can be very productive. As you start understanding your problem better you can refine your script further, incrementally improving it based on the work you do at the IPython prompt. At any point you can use the `%hist` magic to print out your history without prompts, so that you can copy useful fragments back into the script.
1106 # This allows you to plot data, try out ideas, etc, in a `%run`/interact/edit cycle that can be very productive. As you start understanding your problem better you can refine your script further, incrementally improving it based on the work you do at the IPython prompt. At any point you can use the `%hist` magic to print out your history without prompts, so that you can copy useful fragments back into the script.
1107 #
1107 #
1108 # By default, `%run` executes scripts in a completely empty namespace, to better mimic how they would execute at the system prompt with plain Python. But if you use the `-i` flag, the script will also see your interactively defined variables. This lets you edit in a script larger amounts of code that still behave as if you had typed them at the IPython prompt.
1108 # By default, `%run` executes scripts in a completely empty namespace, to better mimic how they would execute at the system prompt with plain Python. But if you use the `-i` flag, the script will also see your interactively defined variables. This lets you edit in a script larger amounts of code that still behave as if you had typed them at the IPython prompt.
1109 #
1109 #
1110 # You can also get a summary of the time taken by your script with the `-t` flag; consider a different script `randsvd.py` that takes a bit longer to run:
1110 # You can also get a summary of the time taken by your script with the `-t` flag; consider a different script `randsvd.py` that takes a bit longer to run:
1111 #
1111 #
1112 # In [21]: run -t randsvd.py
1112 # In [21]: run -t randsvd.py
1113 #
1113 #
1114 # IPython CPU timings (estimated):
1114 # IPython CPU timings (estimated):
1115 # User : 0.38 s.
1115 # User : 0.38 s.
1116 # System : 0.04 s.
1116 # System : 0.04 s.
1117 # Wall time: 0.34 s.
1117 # Wall time: 0.34 s.
1118 #
1118 #
1119 # `User` is the time spent by the computer executing your code, while `System` is the time the operating system had to work on your behalf, doing things like memory allocation that are needed by your code but that you didn't explicitly program and that happen inside the kernel. The `Wall time` is the time on a 'clock on the wall' between the start and end of your program.
1119 # `User` is the time spent by the computer executing your code, while `System` is the time the operating system had to work on your behalf, doing things like memory allocation that are needed by your code but that you didn't explicitly program and that happen inside the kernel. The `Wall time` is the time on a 'clock on the wall' between the start and end of your program.
1120 #
1120 #
1121 # If `Wall > User+System`, your code is most likely waiting idle for certain periods. That could be waiting for data to arrive from a remote source or perhaps because the operating system has to swap large amounts of virtual memory. If you know that your code doesn't explicitly wait for remote data to arrive, you should investigate further to identify possible ways of improving the performance profile.
1121 # If `Wall > User+System`, your code is most likely waiting idle for certain periods. That could be waiting for data to arrive from a remote source or perhaps because the operating system has to swap large amounts of virtual memory. If you know that your code doesn't explicitly wait for remote data to arrive, you should investigate further to identify possible ways of improving the performance profile.
1122 #
1122 #
1123 # If you only want to time how long a single statement takes, you don't need to put it into a script as you can use the `%timeit` magic, which uses Python's `timeit` module to very carefully measure timig data; `timeit` can measure even short statements that execute extremely fast:
1123 # If you only want to time how long a single statement takes, you don't need to put it into a script as you can use the `%timeit` magic, which uses Python's `timeit` module to very carefully measure timig data; `timeit` can measure even short statements that execute extremely fast:
1124 #
1124 #
1125 # In [27]: %timeit a=1
1125 # In [27]: %timeit a=1
1126 # 10000000 loops, best of 3: 23 ns per loop
1126 # 10000000 loops, best of 3: 23 ns per loop
1127 #
1127 #
1128 # and for code that runs longer, it automatically adjusts so the overall measurement doesn't take too long:
1128 # and for code that runs longer, it automatically adjusts so the overall measurement doesn't take too long:
1129 #
1129 #
1130 # In [28]: %timeit np.linalg.svd(x)
1130 # In [28]: %timeit np.linalg.svd(x)
1131 # 1 loops, best of 3: 310 ms per loop
1131 # 1 loops, best of 3: 310 ms per loop
1132 #
1132 #
1133 # The `%run` magic still has more options for debugging and profiling data; you should read its documentation for many useful details (as always, just type `%run?`).
1133 # The `%run` magic still has more options for debugging and profiling data; you should read its documentation for many useful details (as always, just type `%run?`).
1134
1134
1135 ### The graphical Qt console
1135 ### The graphical Qt console
1136
1136
1137 # If you type at the system prompt (see the IPython website for installation details, as this requires some additional libraries):
1137 # If you type at the system prompt (see the IPython website for installation details, as this requires some additional libraries):
1138 #
1138 #
1139 # $ ipython qtconsole
1139 # $ ipython qtconsole
1140 #
1140 #
1141 # instead of opening in a terminal as before, IPython will start a graphical console that at first sight appears just like a terminal, but which is in fact much more capable than a text-only terminal. This is a specialized terminal designed for interactive scientific work, and it supports full multi-line editing with color highlighting and graphical calltips for functions, it can keep multiple IPython sessions open simultaneously in tabs, and when scripts run it can display the figures inline directly in the work area.
1141 # instead of opening in a terminal as before, IPython will start a graphical console that at first sight appears just like a terminal, but which is in fact much more capable than a text-only terminal. This is a specialized terminal designed for interactive scientific work, and it supports full multi-line editing with color highlighting and graphical calltips for functions, it can keep multiple IPython sessions open simultaneously in tabs, and when scripts run it can display the figures inline directly in the work area.
1142 #
1142 #
1143 # <center><img src="ipython_qtconsole2.png" width=400px></center>
1143 # <center><img src="ipython_qtconsole2.png" width=400px></center>
1144
1144
1145 # % This cell is for the pdflatex output only
1145 # % This cell is for the pdflatex output only
1146 # \begin{figure}[htbp]
1146 # \begin{figure}[htbp]
1147 # \centering
1147 # \centering
1148 # \includegraphics[width=3in]{ipython_qtconsole2.png}
1148 # \includegraphics[width=3in]{ipython_qtconsole2.png}
1149 # \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
1149 # \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
1150 # \end{figure}
1150 # \end{figure}
1151
1151
1152 # The Qt console accepts the same `--pylab` startup flags as the terminal, but you can additionally supply the value `--pylab inline`, which enables the support for inline graphics shown in the figure. This is ideal for keeping all the code and figures in the same session, given that the console can save the output of your entire session to HTML or PDF.
1152 # The Qt console accepts the same `--pylab` startup flags as the terminal, but you can additionally supply the value `--pylab inline`, which enables the support for inline graphics shown in the figure. This is ideal for keeping all the code and figures in the same session, given that the console can save the output of your entire session to HTML or PDF.
1153 #
1153 #
1154 # Since the Qt console makes it far more convenient than the terminal to edit blocks of code with multiple lines, in this environment it's worth knowing about the `%loadpy` magic function. `%loadpy` takes a path to a local file or remote URL, fetches its contents, and puts it in the work area for you to further edit and execute. It can be an extremely fast and convenient way of loading code from local disk or remote examples from sites such as the [Matplotlib gallery](http://matplotlib.sourceforge.net/gallery.html).
1154 # Since the Qt console makes it far more convenient than the terminal to edit blocks of code with multiple lines, in this environment it's worth knowing about the `%loadpy` magic function. `%loadpy` takes a path to a local file or remote URL, fetches its contents, and puts it in the work area for you to further edit and execute. It can be an extremely fast and convenient way of loading code from local disk or remote examples from sites such as the [Matplotlib gallery](http://matplotlib.sourceforge.net/gallery.html).
1155 #
1155 #
1156 # Other than its enhanced capabilities for code and graphics, all of the features of IPython we've explained before remain functional in this graphical console.
1156 # Other than its enhanced capabilities for code and graphics, all of the features of IPython we've explained before remain functional in this graphical console.
1157
1157
1158 ### The IPython Notebook
1158 ### The IPython Notebook
1159
1159
1160 # The third way to interact with IPython, in addition to the terminal and graphical Qt console, is a powerful web interface called the "IPython Notebook". If you run at the system console (you can omit the `pylab` flags if you don't need plotting support):
1160 # The third way to interact with IPython, in addition to the terminal and graphical Qt console, is a powerful web interface called the "IPython Notebook". If you run at the system console (you can omit the `pylab` flags if you don't need plotting support):
1161 #
1161 #
1162 # $ ipython notebook --pylab inline
1162 # $ ipython notebook --pylab inline
1163 #
1163 #
1164 # IPython will start a process that runs a web server in your local machine and to which a web browser can connect. The Notebook is a workspace that lets you execute code in blocks called 'cells' and displays any results and figures, but which can also contain arbitrary text (including LaTeX-formatted mathematical expressions) and any rich media that a modern web browser is capable of displaying.
1164 # IPython will start a process that runs a web server in your local machine and to which a web browser can connect. The Notebook is a workspace that lets you execute code in blocks called 'cells' and displays any results and figures, but which can also contain arbitrary text (including LaTeX-formatted mathematical expressions) and any rich media that a modern web browser is capable of displaying.
1165 #
1165 #
1166 # <center><img src="ipython-notebook-specgram-2.png" width=400px></center>
1166 # <center><img src="ipython-notebook-specgram-2.png" width=400px></center>
1167
1167
1168 # % This cell is for the pdflatex output only
1168 # % This cell is for the pdflatex output only
1169 # \begin{figure}[htbp]
1169 # \begin{figure}[htbp]
1170 # \centering
1170 # \centering
1171 # \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
1171 # \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
1172 # \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
1172 # \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
1173 # \end{figure}
1173 # \end{figure}
1174
1174
1175 # In fact, this document was written as a Notebook, and only exported to LaTeX for printing. Inside of each cell, all the features of IPython that we have discussed before remain functional, since ultimately this web client is communicating with the same IPython code that runs in the terminal. But this interface is a much more rich and powerful environment for maintaining long-term "live and executable" scientific documents.
1175 # In fact, this document was written as a Notebook, and only exported to LaTeX for printing. Inside of each cell, all the features of IPython that we have discussed before remain functional, since ultimately this web client is communicating with the same IPython code that runs in the terminal. But this interface is a much more rich and powerful environment for maintaining long-term "live and executable" scientific documents.
1176 #
1176 #
1177 # Notebook environments have existed in commercial systems like Mathematica(TM) and Maple(TM) for a long time; in the open source world the [Sage](http://sagemath.org) project blazed this particular trail starting in 2006, and now we bring all the features that have made IPython such a widely used tool to a Notebook model.
1177 # Notebook environments have existed in commercial systems like Mathematica(TM) and Maple(TM) for a long time; in the open source world the [Sage](http://sagemath.org) project blazed this particular trail starting in 2006, and now we bring all the features that have made IPython such a widely used tool to a Notebook model.
1178 #
1178 #
1179 # Since the Notebook runs as a web application, it is possible to configure it for remote access, letting you run your computations on a persistent server close to your data, which you can then access remotely from any browser-equipped computer. We encourage you to read the extensive documentation provided by the IPython project for details on how to do this and many more features of the notebook.
1179 # Since the Notebook runs as a web application, it is possible to configure it for remote access, letting you run your computations on a persistent server close to your data, which you can then access remotely from any browser-equipped computer. We encourage you to read the extensive documentation provided by the IPython project for details on how to do this and many more features of the notebook.
1180 #
1180 #
1181 # Finally, as we said earlier, IPython also has a high-level and easy to use set of libraries for parallel computing, that let you control (interactively if desired) not just one IPython but an entire cluster of 'IPython engines'. Unfortunately a detailed discussion of these tools is beyond the scope of this text, but should you need to parallelize your analysis codes, a quick read of the tutorials and examples provided at the IPython site may prove fruitful.
1181 # Finally, as we said earlier, IPython also has a high-level and easy to use set of libraries for parallel computing, that let you control (interactively if desired) not just one IPython but an entire cluster of 'IPython engines'. Unfortunately a detailed discussion of these tools is beyond the scope of this text, but should you need to parallelize your analysis codes, a quick read of the tutorials and examples provided at the IPython site may prove fruitful.
@@ -1,2077 +1,2077 b''
1 An Introduction to the Scientific Python Ecosystem
1 An Introduction to the Scientific Python Ecosystem
2 ==================================================
2 ==================================================
3
3
4 While the Python language is an excellent tool for general-purpose
4 While the Python language is an excellent tool for general-purpose
5 programming, with a highly readable syntax, rich and powerful data types
5 programming, with a highly readable syntax, rich and powerful data types
6 (strings, lists, sets, dictionaries, arbitrary length integers, etc) and
6 (strings, lists, sets, dictionaries, arbitrary length integers, etc) and
7 a very comprehensive standard library, it was not designed specifically
7 a very comprehensive standard library, it was not designed specifically
8 for mathematical and scientific computing. Neither the language nor its
8 for mathematical and scientific computing. Neither the language nor its
9 standard library have facilities for the efficient representation of
9 standard library have facilities for the efficient representation of
10 multidimensional datasets, tools for linear algebra and general matrix
10 multidimensional datasets, tools for linear algebra and general matrix
11 manipulations (an essential building block of virtually all technical
11 manipulations (an essential building block of virtually all technical
12 computing), nor any data visualization facilities.
12 computing), nor any data visualization facilities.
13
13
14 In particular, Python lists are very flexible containers that can be
14 In particular, Python lists are very flexible containers that can be
15 nested arbitrarily deep and which can hold any Python object in them,
15 nested arbitrarily deep and which can hold any Python object in them,
16 but they are poorly suited to represent efficiently common mathematical
16 but they are poorly suited to represent efficiently common mathematical
17 constructs like vectors and matrices. In contrast, much of our modern
17 constructs like vectors and matrices. In contrast, much of our modern
18 heritage of scientific computing has been built on top of libraries
18 heritage of scientific computing has been built on top of libraries
19 written in the Fortran language, which has native support for vectors
19 written in the Fortran language, which has native support for vectors
20 and matrices as well as a library of mathematical functions that can
20 and matrices as well as a library of mathematical functions that can
21 efficiently operate on entire arrays at once.
21 efficiently operate on entire arrays at once.
22
22
23 Scientific Python: a collaboration of projects built by scientists
23 Scientific Python: a collaboration of projects built by scientists
24 ------------------------------------------------------------------
24 ------------------------------------------------------------------
25
25
26 The scientific community has developed a set of related Python libraries
26 The scientific community has developed a set of related Python libraries
27 that provide powerful array facilities, linear algebra, numerical
27 that provide powerful array facilities, linear algebra, numerical
28 algorithms, data visualization and more. In this appendix, we will
28 algorithms, data visualization and more. In this appendix, we will
29 briefly outline the tools most frequently used for this purpose, that
29 briefly outline the tools most frequently used for this purpose, that
30 make "Scientific Python" something far more powerful than the Python
30 make "Scientific Python" something far more powerful than the Python
31 language alone.
31 language alone.
32
32
33 For reasons of space, we can only describe in some detail the central
33 For reasons of space, we can only describe in some detail the central
34 Numpy library, but below we provide links to the websites of each
34 Numpy library, but below we provide links to the websites of each
35 project where you can read their documentation in more detail.
35 project where you can read their documentation in more detail.
36
36
37 First, let's look at an overview of the basic tools that most scientists
37 First, let's look at an overview of the basic tools that most scientists
38 use in daily research with Python. The core of this ecosystem is
38 use in daily research with Python. The core of this ecosystem is
39 composed of:
39 composed of:
40
40
41 - Numpy: the basic library that most others depend on, it provides a
41 - Numpy: the basic library that most others depend on, it provides a
42 powerful array type that can represent multidmensional datasets of
42 powerful array type that can represent multidmensional datasets of
43 many different kinds and that supports arithmetic operations. Numpy
43 many different kinds and that supports arithmetic operations. Numpy
44 also provides a library of common mathematical functions, basic
44 also provides a library of common mathematical functions, basic
45 linear algebra, random number generation and Fast Fourier Transforms.
45 linear algebra, random number generation and Fast Fourier Transforms.
46 Numpy can be found at `numpy.scipy.org <http://numpy.scipy.org>`_
46 Numpy can be found at `numpy.scipy.org <http://numpy.scipy.org>`_
47
47
48 - Scipy: a large collection of numerical algorithms that operate on
48 - Scipy: a large collection of numerical algorithms that operate on
49 numpy arrays and provide facilities for many common tasks in
49 numpy arrays and provide facilities for many common tasks in
50 scientific computing, including dense and sparse linear algebra
50 scientific computing, including dense and sparse linear algebra
51 support, optimization, special functions, statistics, n-dimensional
51 support, optimization, special functions, statistics, n-dimensional
52 image processing, signal processing and more. Scipy can be found at
52 image processing, signal processing and more. Scipy can be found at
53 `scipy.org <http://scipy.org>`_.
53 `scipy.org <http://scipy.org>`_.
54
54
55 - Matplotlib: a data visualization library with a strong focus on
55 - Matplotlib: a data visualization library with a strong focus on
56 producing high-quality output, it supports a variety of common
56 producing high-quality output, it supports a variety of common
57 scientific plot types in two and three dimensions, with precise
57 scientific plot types in two and three dimensions, with precise
58 control over the final output and format for publication-quality
58 control over the final output and format for publication-quality
59 results. Matplotlib can also be controlled interactively allowing
59 results. Matplotlib can also be controlled interactively allowing
60 graphical manipulation of your data (zooming, panning, etc) and can
60 graphical manipulation of your data (zooming, panning, etc) and can
61 be used with most modern user interface toolkits. It can be found at
61 be used with most modern user interface toolkits. It can be found at
62 `matplotlib.sf.net <http://matplotlib.sf.net>`_.
62 `matplotlib.sf.net <http://matplotlib.sf.net>`_.
63
63
64 - IPython: while not strictly scientific in nature, IPython is the
64 - IPython: while not strictly scientific in nature, IPython is the
65 interactive environment in which many scientists spend their time.
65 interactive environment in which many scientists spend their time.
66 IPython provides a powerful Python shell that integrates tightly with
66 IPython provides a powerful Python shell that integrates tightly with
67 Matplotlib and with easy access to the files and operating system,
67 Matplotlib and with easy access to the files and operating system,
68 and which can execute in a terminal or in a graphical Qt console.
68 and which can execute in a terminal or in a graphical Qt console.
69 IPython also has a web-based notebook interface that can combine code
69 IPython also has a web-based notebook interface that can combine code
70 with text, mathematical expressions, figures and multimedia. It can
70 with text, mathematical expressions, figures and multimedia. It can
71 be found at `ipython.org <http://ipython.org>`_.
71 be found at `ipython.org <http://ipython.org>`_.
72
72
73 While each of these tools can be installed separately, in our opinion
73 While each of these tools can be installed separately, in our opinion
74 the most convenient way today of accessing them (especially on Windows
74 the most convenient way today of accessing them (especially on Windows
75 and Mac computers) is to install the `Free Edition of the Enthought
75 and Mac computers) is to install the `Free Edition of the Enthought
76 Python Distribution <http://www.enthought.com/products/epd_free.php>`_
76 Python Distribution <http://www.enthought.com/products/epd_free.php>`_
77 which contain all the above. Other free alternatives on Windows (but not
77 which contain all the above. Other free alternatives on Windows (but not
78 on Macs) are `Python(x,y) <http://code.google.com/p/pythonxy>`_ and
78 on Macs) are `Python(x,y) <http://code.google.com/p/pythonxy>`_ and
79 `Christoph Gohlke's packages
79 `Christoph Gohlke's packages
80 page <http://www.lfd.uci.edu/~gohlke/pythonlibs>`_.
80 page <http://www.lfd.uci.edu/~gohlke/pythonlibs>`_.
81
81
82 These four 'core' libraries are in practice complemented by a number of
82 These four 'core' libraries are in practice complemented by a number of
83 other tools for more specialized work. We will briefly list here the
83 other tools for more specialized work. We will briefly list here the
84 ones that we think are the most commonly needed:
84 ones that we think are the most commonly needed:
85
85
86 - Sympy: a symbolic manipulation tool that turns a Python session into
86 - Sympy: a symbolic manipulation tool that turns a Python session into
87 a computer algebra system. It integrates with the IPython notebook,
87 a computer algebra system. It integrates with the IPython notebook,
88 rendering results in properly typeset mathematical notation.
88 rendering results in properly typeset mathematical notation.
89 `sympy.org <http://sympy.org>`_.
89 `sympy.org <http://sympy.org>`_.
90
90
91 - Mayavi: sophisticated 3d data visualization;
91 - Mayavi: sophisticated 3d data visualization;
92 `code.enthought.com/projects/mayavi <http://code.enthought.com/projects/mayavi>`_.
92 `code.enthought.com/projects/mayavi <http://code.enthought.com/projects/mayavi>`_.
93
93
94 - Cython: a bridge language between Python and C, useful both to
94 - Cython: a bridge language between Python and C, useful both to
95 optimize performance bottlenecks in Python and to access C libraries
95 optimize performance bottlenecks in Python and to access C libraries
96 directly; `cython.org <http://cython.org>`_.
96 directly; `cython.org <http://cython.org>`_.
97
97
98 - Pandas: high-performance data structures and data analysis tools,
98 - Pandas: high-performance data structures and data analysis tools,
99 with powerful data alignment and structural manipulation
99 with powerful data alignment and structural manipulation
100 capabilities; `pandas.pydata.org <http://pandas.pydata.org>`_.
100 capabilities; `pandas.pydata.org <http://pandas.pydata.org>`_.
101
101
102 - Statsmodels: statistical data exploration and model estimation;
102 - Statsmodels: statistical data exploration and model estimation;
103 `statsmodels.sourceforge.net <http://statsmodels.sourceforge.net>`_.
103 `statsmodels.sourceforge.net <http://statsmodels.sourceforge.net>`_.
104
104
105 - Scikit-learn: general purpose machine learning algorithms with a
105 - Scikit-learn: general purpose machine learning algorithms with a
106 common interface; `scikit-learn.org <http://scikit-learn.org>`_.
106 common interface; `scikit-learn.org <http://scikit-learn.org>`_.
107
107
108 - Scikits-image: image processing toolbox;
108 - Scikits-image: image processing toolbox;
109 `scikits-image.org <http://scikits-image.org>`_.
109 `scikits-image.org <http://scikits-image.org>`_.
110
110
111 - NetworkX: analysis of complex networks (in the graph theoretical
111 - NetworkX: analysis of complex networks (in the graph theoretical
112 sense); `networkx.lanl.gov <http://networkx.lanl.gov>`_.
112 sense); `networkx.lanl.gov <http://networkx.lanl.gov>`_.
113
113
114 - PyTables: management of hierarchical datasets using the
114 - PyTables: management of hierarchical datasets using the
115 industry-standard HDF5 format;
115 industry-standard HDF5 format;
116 `www.pytables.org <http://www.pytables.org>`_.
116 `www.pytables.org <http://www.pytables.org>`_.
117
117
118 Beyond these, for any specific problem you should look on the internet
118 Beyond these, for any specific problem you should look on the internet
119 first, before starting to write code from scratch. There's a good chance
119 first, before starting to write code from scratch. There's a good chance
120 that someone, somewhere, has written an open source library that you can
120 that someone, somewhere, has written an open source library that you can
121 use for part or all of your problem.
121 use for part or all of your problem.
122
122
123 A note about the examples below
123 A note about the examples below
124 -------------------------------
124 -------------------------------
125
125
126 In all subsequent examples, you will see blocks of input code, followed
126 In all subsequent examples, you will see blocks of input code, followed
127 by the results of the code if the code generated output. This output may
127 by the results of the code if the code generated output. This output may
128 include text, graphics and other result objects. These blocks of input
128 include text, graphics and other result objects. These blocks of input
129 can be pasted into your interactive IPython session or notebook for you
129 can be pasted into your interactive IPython session or notebook for you
130 to execute. In the print version of this document, a thin vertical bar
130 to execute. In the print version of this document, a thin vertical bar
131 on the left of the blocks of input and output shows which blocks go
131 on the left of the blocks of input and output shows which blocks go
132 together.
132 together.
133
133
134 If you are reading this text as an actual IPython notebook, you can
134 If you are reading this text as an actual IPython notebook, you can
135 press ``Shift-Enter`` or use the 'play' button on the toolbar
135 press ``Shift-Enter`` or use the 'play' button on the toolbar
136 (right-pointing triangle) to execute each block of code, known as a
136 (right-pointing triangle) to execute each block of code, known as a
137 'cell' in IPython:
137 'cell' in IPython:
138
138
139 In[71]:
139 In[71]:
140
140
141 .. code:: python
141 .. code:: python
142
142
143 # This is a block of code, below you'll see its output
143 # This is a block of code, below you'll see its output
144 print "Welcome to the world of scientific computing with Python!"
144 print "Welcome to the world of scientific computing with Python!"
145
145
146 .. parsed-literal::
146 .. parsed-literal::
147
147
148 Welcome to the world of scientific computing with Python!
148 Welcome to the world of scientific computing with Python!
149
149
150
150
151 Motivation: the trapezoidal rule
151 Motivation: the trapezoidal rule
152 ================================
152 ================================
153
153
154 In subsequent sections we'll provide a basic introduction to the nuts
154 In subsequent sections we'll provide a basic introduction to the nuts
155 and bolts of the basic scientific python tools; but we'll first motivate
155 and bolts of the basic scientific python tools; but we'll first motivate
156 it with a brief example that illustrates what you can do in a few lines
156 it with a brief example that illustrates what you can do in a few lines
157 with these tools. For this, we will use the simple problem of
157 with these tools. For this, we will use the simple problem of
158 approximating a definite integral with the trapezoid rule:
158 approximating a definite integral with the trapezoid rule:
159
159
160 .. math::
160 .. math::
161
161
162
162
163 \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
163 \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
164
164
165 Our task will be to compute this formula for a function such as:
165 Our task will be to compute this formula for a function such as:
166
166
167 .. math::
167 .. math::
168
168
169
169
170 f(x) = (x-3)(x-5)(x-7)+85
170 f(x) = (x-3)(x-5)(x-7)+85
171
171
172 integrated between :math:`a=1` and :math:`b=9`.
172 integrated between :math:`a=1` and :math:`b=9`.
173
173
174 First, we define the function and sample it evenly between 0 and 10 at
174 First, we define the function and sample it evenly between 0 and 10 at
175 200 points:
175 200 points:
176
176
177 In[1]:
177 In[1]:
178
178
179 .. code:: python
179 .. code:: python
180
180
181 def f(x):
181 def f(x):
182 return (x-3)*(x-5)*(x-7)+85
182 return (x-3)*(x-5)*(x-7)+85
183
183
184 import numpy as np
184 import numpy as np
185 x = np.linspace(0, 10, 200)
185 x = np.linspace(0, 10, 200)
186 y = f(x)
186 y = f(x)
187
187
188 We select :math:`a` and :math:`b`, our integration limits, and we take
188 We select :math:`a` and :math:`b`, our integration limits, and we take
189 only a few points in that region to illustrate the error behavior of the
189 only a few points in that region to illustrate the error behavior of the
190 trapezoid approximation:
190 trapezoid approximation:
191
191
192 In[2]:
192 In[2]:
193
193
194 .. code:: python
194 .. code:: python
195
195
196 a, b = 1, 9
196 a, b = 1, 9
197 xint = x[logical_and(x>=a, x<=b)][::30]
197 xint = x[logical_and(x>=a, x<=b)][::30]
198 yint = y[logical_and(x>=a, x<=b)][::30]
198 yint = y[logical_and(x>=a, x<=b)][::30]
199
199
200 Let's plot both the function and the area below it in the trapezoid
200 Let's plot both the function and the area below it in the trapezoid
201 approximation:
201 approximation:
202
202
203 In[3]:
203 In[3]:
204
204
205 .. code:: python
205 .. code:: python
206
206
207 import matplotlib.pyplot as plt
207 import matplotlib.pyplot as plt
208 plt.plot(x, y, lw=2)
208 plt.plot(x, y, lw=2)
209 plt.axis([0, 10, 0, 140])
209 plt.axis([0, 10, 0, 140])
210 plt.fill_between(xint, 0, yint, facecolor='gray', alpha=0.4)
210 plt.fill_between(xint, 0, yint, facecolor='gray', alpha=0.4)
211 plt.text(0.5 * (a + b), 30,r"$\int_a^b f(x)dx$", horizontalalignment='center', fontsize=20);
211 plt.text(0.5 * (a + b), 30,r"$\int_a^b f(x)dx$", horizontalalignment='center', fontsize=20);
212
212
213 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.svg
213 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.svg
214
214
215 Compute the integral both at high accuracy and with the trapezoid
215 Compute the integral both at high accuracy and with the trapezoid
216 approximation
216 approximation
217
217
218 In[4]:
218 In[4]:
219
219
220 .. code:: python
220 .. code:: python
221
221
222 from scipy.integrate import quad, trapz
222 from scipy.integrate import quad, trapz
223 integral, error = quad(f, 1, 9)
223 integral, error = quad(f, 1, 9)
224 trap_integral = trapz(yint, xint)
224 trap_integral = trapz(yint, xint)
225 print "The integral is: %g +/- %.1e" % (integral, error)
225 print "The integral is: %g +/- %.1e" % (integral, error)
226 print "The trapezoid approximation with", len(xint), "points is:", trap_integral
226 print "The trapezoid approximation with", len(xint), "points is:", trap_integral
227 print "The absolute error is:", abs(integral - trap_integral)
227 print "The absolute error is:", abs(integral - trap_integral)
228
228
229 .. parsed-literal::
229 .. parsed-literal::
230
230
231 The integral is: 680 +/- 7.5e-12
231 The integral is: 680 +/- 7.5e-12
232 The trapezoid approximation with 6 points is: 621.286411141
232 The trapezoid approximation with 6 points is: 621.286411141
233 The absolute error is: 58.7135888589
233 The absolute error is: 58.7135888589
234
234
235
235
236 This simple example showed us how, combining the numpy, scipy and
236 This simple example showed us how, combining the numpy, scipy and
237 matplotlib libraries we can provide an illustration of a standard method
237 matplotlib libraries we can provide an illustration of a standard method
238 in elementary calculus with just a few lines of code. We will now
238 in elementary calculus with just a few lines of code. We will now
239 discuss with more detail the basic usage of these tools.
239 discuss with more detail the basic usage of these tools.
240
240
241 NumPy arrays: the right data structure for scientific computing
241 NumPy arrays: the right data structure for scientific computing
242 ===============================================================
242 ===============================================================
243
243
244 Basics of Numpy arrays
244 Basics of Numpy arrays
245 ----------------------
245 ----------------------
246
246
247 We now turn our attention to the Numpy library, which forms the base
247 We now turn our attention to the Numpy library, which forms the base
248 layer for the entire 'scipy ecosystem'. Once you have installed numpy,
248 layer for the entire 'scipy ecosystem'. Once you have installed numpy,
249 you can import it as
249 you can import it as
250
250
251 In[5]:
251 In[5]:
252
252
253 .. code:: python
253 .. code:: python
254
254
255 import numpy
255 import numpy
256
256
257 though in this book we will use the common shorthand
257 though in this book we will use the common shorthand
258
258
259 In[6]:
259 In[6]:
260
260
261 .. code:: python
261 .. code:: python
262
262
263 import numpy as np
263 import numpy as np
264
264
265 As mentioned above, the main object provided by numpy is a powerful
265 As mentioned above, the main object provided by numpy is a powerful
266 array. We'll start by exploring how the numpy array differs from Python
266 array. We'll start by exploring how the numpy array differs from Python
267 lists. We start by creating a simple list and an array with the same
267 lists. We start by creating a simple list and an array with the same
268 contents of the list:
268 contents of the list:
269
269
270 In[7]:
270 In[7]:
271
271
272 .. code:: python
272 .. code:: python
273
273
274 lst = [10, 20, 30, 40]
274 lst = [10, 20, 30, 40]
275 arr = np.array([10, 20, 30, 40])
275 arr = np.array([10, 20, 30, 40])
276
276
277 Elements of a one-dimensional array are accessed with the same syntax as
277 Elements of a one-dimensional array are accessed with the same syntax as
278 a list:
278 a list:
279
279
280 In[8]:
280 In[8]:
281
281
282 .. code:: python
282 .. code:: python
283
283
284 lst[0]
284 lst[0]
285
285
286 Out[8]:
286 Out[8]:
287
287
288 .. parsed-literal::
288 .. parsed-literal::
289
289
290 10
290 10
291
291
292 In[9]:
292 In[9]:
293
293
294 .. code:: python
294 .. code:: python
295
295
296 arr[0]
296 arr[0]
297
297
298 Out[9]:
298 Out[9]:
299
299
300 .. parsed-literal::
300 .. parsed-literal::
301
301
302 10
302 10
303
303
304 In[10]:
304 In[10]:
305
305
306 .. code:: python
306 .. code:: python
307
307
308 arr[-1]
308 arr[-1]
309
309
310 Out[10]:
310 Out[10]:
311
311
312 .. parsed-literal::
312 .. parsed-literal::
313
313
314 40
314 40
315
315
316 In[11]:
316 In[11]:
317
317
318 .. code:: python
318 .. code:: python
319
319
320 arr[2:]
320 arr[2:]
321
321
322 Out[11]:
322 Out[11]:
323
323
324 .. parsed-literal::
324 .. parsed-literal::
325
325
326 array([30, 40])
326 array([30, 40])
327
327
328 The first difference to note between lists and arrays is that arrays are
328 The first difference to note between lists and arrays is that arrays are
329 *homogeneous*; i.e. all elements of an array must be of the same type.
329 *homogeneous*; i.e. all elements of an array must be of the same type.
330 In contrast, lists can contain elements of arbitrary type. For example,
330 In contrast, lists can contain elements of arbitrary type. For example,
331 we can change the last element in our list above to be a string:
331 we can change the last element in our list above to be a string:
332
332
333 In[12]:
333 In[12]:
334
334
335 .. code:: python
335 .. code:: python
336
336
337 lst[-1] = 'a string inside a list'
337 lst[-1] = 'a string inside a list'
338 lst
338 lst
339
339
340 Out[12]:
340 Out[12]:
341
341
342 .. parsed-literal::
342 .. parsed-literal::
343
343
344 [10, 20, 30, 'a string inside a list']
344 [10, 20, 30, 'a string inside a list']
345
345
346 but the same can not be done with an array, as we get an error message:
346 but the same can not be done with an array, as we get an error message:
347
347
348 In[13]:
348 In[13]:
349
349
350 .. code:: python
350 .. code:: python
351
351
352 arr[-1] = 'a string inside an array'
352 arr[-1] = 'a string inside an array'
353
353
354 ::
354 ::
355
355
356 ---------------------------------------------------------------------------
356 ---------------------------------------------------------------------------
357 ValueError Traceback (most recent call last)
357 ValueError Traceback (most recent call last)
358 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
358 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
359 ----> 1 arr[-1] = 'a string inside an array'
359 ----> 1 arr[-1] = 'a string inside an array'
360
360
361 ValueError: invalid literal for long() with base 10: 'a string inside an array'
361 ValueError: invalid literal for long() with base 10: 'a string inside an array'
362
362
363 The information about the type of an array is contained in its *dtype*
363 The information about the type of an array is contained in its *dtype*
364 attribute:
364 attribute:
365
365
366 In[14]:
366 In[14]:
367
367
368 .. code:: python
368 .. code:: python
369
369
370 arr.dtype
370 arr.dtype
371
371
372 Out[14]:
372 Out[14]:
373
373
374 .. parsed-literal::
374 .. parsed-literal::
375
375
376 dtype('int32')
376 dtype('int32')
377
377
378 Once an array has been created, its dtype is fixed and it can only store
378 Once an array has been created, its dtype is fixed and it can only store
379 elements of the same type. For this example where the dtype is integer,
379 elements of the same type. For this example where the dtype is integer,
380 if we store a floating point number it will be automatically converted
380 if we store a floating point number it will be automatically converted
381 into an integer:
381 into an integer:
382
382
383 In[15]:
383 In[15]:
384
384
385 .. code:: python
385 .. code:: python
386
386
387 arr[-1] = 1.234
387 arr[-1] = 1.234
388 arr
388 arr
389
389
390 Out[15]:
390 Out[15]:
391
391
392 .. parsed-literal::
392 .. parsed-literal::
393
393
394 array([10, 20, 30, 1])
394 array([10, 20, 30, 1])
395
395
396 Above we created an array from an existing list; now let us now see
396 Above we created an array from an existing list; now let us now see
397 other ways in which we can create arrays, which we'll illustrate next. A
397 other ways in which we can create arrays, which we'll illustrate next. A
398 common need is to have an array initialized with a constant value, and
398 common need is to have an array initialized with a constant value, and
399 very often this value is 0 or 1 (suitable as starting value for additive
399 very often this value is 0 or 1 (suitable as starting value for additive
400 and multiplicative loops respectively); ``zeros`` creates arrays of all
400 and multiplicative loops respectively); ``zeros`` creates arrays of all
401 zeros, with any desired dtype:
401 zeros, with any desired dtype:
402
402
403 In[16]:
403 In[16]:
404
404
405 .. code:: python
405 .. code:: python
406
406
407 np.zeros(5, float)
407 np.zeros(5, float)
408
408
409 Out[16]:
409 Out[16]:
410
410
411 .. parsed-literal::
411 .. parsed-literal::
412
412
413 array([ 0., 0., 0., 0., 0.])
413 array([ 0., 0., 0., 0., 0.])
414
414
415 In[17]:
415 In[17]:
416
416
417 .. code:: python
417 .. code:: python
418
418
419 np.zeros(3, int)
419 np.zeros(3, int)
420
420
421 Out[17]:
421 Out[17]:
422
422
423 .. parsed-literal::
423 .. parsed-literal::
424
424
425 array([0, 0, 0])
425 array([0, 0, 0])
426
426
427 In[18]:
427 In[18]:
428
428
429 .. code:: python
429 .. code:: python
430
430
431 np.zeros(3, complex)
431 np.zeros(3, complex)
432
432
433 Out[18]:
433 Out[18]:
434
434
435 .. parsed-literal::
435 .. parsed-literal::
436
436
437 array([ 0.+0.j, 0.+0.j, 0.+0.j])
437 array([ 0.+0.j, 0.+0.j, 0.+0.j])
438
438
439 and similarly for ``ones``:
439 and similarly for ``ones``:
440
440
441 In[19]:
441 In[19]:
442
442
443 .. code:: python
443 .. code:: python
444
444
445 print '5 ones:', np.ones(5)
445 print '5 ones:', np.ones(5)
446
446
447 .. parsed-literal::
447 .. parsed-literal::
448
448
449 5 ones: [ 1. 1. 1. 1. 1.]
449 5 ones: [ 1. 1. 1. 1. 1.]
450
450
451
451
452 If we want an array initialized with an arbitrary value, we can create
452 If we want an array initialized with an arbitrary value, we can create
453 an empty array and then use the fill method to put the value we want
453 an empty array and then use the fill method to put the value we want
454 into the array:
454 into the array:
455
455
456 In[20]:
456 In[20]:
457
457
458 .. code:: python
458 .. code:: python
459
459
460 a = empty(4)
460 a = empty(4)
461 a.fill(5.5)
461 a.fill(5.5)
462 a
462 a
463
463
464 Out[20]:
464 Out[20]:
465
465
466 .. parsed-literal::
466 .. parsed-literal::
467
467
468 array([ 5.5, 5.5, 5.5, 5.5])
468 array([ 5.5, 5.5, 5.5, 5.5])
469
469
470 Numpy also offers the ``arange`` function, which works like the builtin
470 Numpy also offers the ``arange`` function, which works like the builtin
471 ``range`` but returns an array instead of a list:
471 ``range`` but returns an array instead of a list:
472
472
473 In[21]:
473 In[21]:
474
474
475 .. code:: python
475 .. code:: python
476
476
477 np.arange(5)
477 np.arange(5)
478
478
479 Out[21]:
479 Out[21]:
480
480
481 .. parsed-literal::
481 .. parsed-literal::
482
482
483 array([0, 1, 2, 3, 4])
483 array([0, 1, 2, 3, 4])
484
484
485 and the ``linspace`` and ``logspace`` functions to create linearly and
485 and the ``linspace`` and ``logspace`` functions to create linearly and
486 logarithmically-spaced grids respectively, with a fixed number of points
486 logarithmically-spaced grids respectively, with a fixed number of points
487 and including both ends of the specified interval:
487 and including both ends of the specified interval:
488
488
489 In[22]:
489 In[22]:
490
490
491 .. code:: python
491 .. code:: python
492
492
493 print "A linear grid between 0 and 1:", np.linspace(0, 1, 5)
493 print "A linear grid between 0 and 1:", np.linspace(0, 1, 5)
494 print "A logarithmic grid between 10**1 and 10**4: ", np.logspace(1, 4, 4)
494 print "A logarithmic grid between 10**1 and 10**4: ", np.logspace(1, 4, 4)
495
495
496 .. parsed-literal::
496 .. parsed-literal::
497
497
498 A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
498 A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
499 A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
499 A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
500
500
501
501
502 Finally, it is often useful to create arrays with random numbers that
502 Finally, it is often useful to create arrays with random numbers that
503 follow a specific distribution. The ``np.random`` module contains a
503 follow a specific distribution. The ``np.random`` module contains a
504 number of functions that can be used to this effect, for example this
504 number of functions that can be used to this effect, for example this
505 will produce an array of 5 random samples taken from a standard normal
505 will produce an array of 5 random samples taken from a standard normal
506 distribution (0 mean and variance 1):
506 distribution (0 mean and variance 1):
507
507
508 In[23]:
508 In[23]:
509
509
510 .. code:: python
510 .. code:: python
511
511
512 np.random.randn(5)
512 np.random.randn(5)
513
513
514 Out[23]:
514 Out[23]:
515
515
516 .. parsed-literal::
516 .. parsed-literal::
517
517
518 array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
518 array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
519
519
520 whereas this will also give 5 samples, but from a normal distribution
520 whereas this will also give 5 samples, but from a normal distribution
521 with a mean of 10 and a variance of 3:
521 with a mean of 10 and a variance of 3:
522
522
523 In[24]:
523 In[24]:
524
524
525 .. code:: python
525 .. code:: python
526
526
527 norm10 = np.random.normal(10, 3, 5)
527 norm10 = np.random.normal(10, 3, 5)
528 norm10
528 norm10
529
529
530 Out[24]:
530 Out[24]:
531
531
532 .. parsed-literal::
532 .. parsed-literal::
533
533
534 array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
534 array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
535
535
536 Indexing with other arrays
536 Indexing with other arrays
537 --------------------------
537 --------------------------
538
538
539 Above we saw how to index arrays with single numbers and slices, just
539 Above we saw how to index arrays with single numbers and slices, just
540 like Python lists. But arrays allow for a more sophisticated kind of
540 like Python lists. But arrays allow for a more sophisticated kind of
541 indexing which is very powerful: you can index an array with another
541 indexing which is very powerful: you can index an array with another
542 array, and in particular with an array of boolean values. This is
542 array, and in particular with an array of boolean values. This is
543 particluarly useful to extract information from an array that matches a
543 particluarly useful to extract information from an array that matches a
544 certain condition.
544 certain condition.
545
545
546 Consider for example that in the array ``norm10`` we want to replace all
546 Consider for example that in the array ``norm10`` we want to replace all
547 values above 9 with the value 0. We can do so by first finding the
547 values above 9 with the value 0. We can do so by first finding the
548 *mask* that indicates where this condition is true or false:
548 *mask* that indicates where this condition is true or false:
549
549
550 In[25]:
550 In[25]:
551
551
552 .. code:: python
552 .. code:: python
553
553
554 mask = norm10 > 9
554 mask = norm10 > 9
555 mask
555 mask
556
556
557 Out[25]:
557 Out[25]:
558
558
559 .. parsed-literal::
559 .. parsed-literal::
560
560
561 array([False, False, False, True, True], dtype=bool)
561 array([False, False, False, True, True], dtype=bool)
562
562
563 Now that we have this mask, we can use it to either read those values or
563 Now that we have this mask, we can use it to either read those values or
564 to reset them to 0:
564 to reset them to 0:
565
565
566 In[26]:
566 In[26]:
567
567
568 .. code:: python
568 .. code:: python
569
569
570 print 'Values above 9:', norm10[mask]
570 print 'Values above 9:', norm10[mask]
571
571
572 .. parsed-literal::
572 .. parsed-literal::
573
573
574 Values above 9: [ 12.14944165 11.56209294]
574 Values above 9: [ 12.14944165 11.56209294]
575
575
576
576
577 In[27]:
577 In[27]:
578
578
579 .. code:: python
579 .. code:: python
580
580
581 print 'Resetting all values above 9 to 0...'
581 print 'Resetting all values above 9 to 0...'
582 norm10[mask] = 0
582 norm10[mask] = 0
583 print norm10
583 print norm10
584
584
585 .. parsed-literal::
585 .. parsed-literal::
586
586
587 Resetting all values above 9 to 0...
587 Resetting all values above 9 to 0...
588 [ 8.94879575 5.53038269 8.24847281 0. 0. ]
588 [ 8.94879575 5.53038269 8.24847281 0. 0. ]
589
589
590
590
591 Arrays with more than one dimension
591 Arrays with more than one dimension
592 -----------------------------------
592 -----------------------------------
593
593
594 Up until now all our examples have used one-dimensional arrays. But
594 Up until now all our examples have used one-dimensional arrays. But
595 Numpy can create arrays of aribtrary dimensions, and all the methods
595 Numpy can create arrays of aribtrary dimensions, and all the methods
596 illustrated in the previous section work with more than one dimension.
596 illustrated in the previous section work with more than one dimension.
597 For example, a list of lists can be used to initialize a two dimensional
597 For example, a list of lists can be used to initialize a two dimensional
598 array:
598 array:
599
599
600 In[28]:
600 In[28]:
601
601
602 .. code:: python
602 .. code:: python
603
603
604 lst2 = [[1, 2], [3, 4]]
604 lst2 = [[1, 2], [3, 4]]
605 arr2 = np.array([[1, 2], [3, 4]])
605 arr2 = np.array([[1, 2], [3, 4]])
606 arr2
606 arr2
607
607
608 Out[28]:
608 Out[28]:
609
609
610 .. parsed-literal::
610 .. parsed-literal::
611
611
612 array([[1, 2],
612 array([[1, 2],
613 [3, 4]])
613 [3, 4]])
614
614
615 With two-dimensional arrays we start seeing the power of numpy: while a
615 With two-dimensional arrays we start seeing the power of numpy: while a
616 nested list can be indexed using repeatedly the ``[ ]`` operator,
616 nested list can be indexed using repeatedly the ``[ ]`` operator,
617 multidimensional arrays support a much more natural indexing syntax with
617 multidimensional arrays support a much more natural indexing syntax with
618 a single ``[ ]`` and a set of indices separated by commas:
618 a single ``[ ]`` and a set of indices separated by commas:
619
619
620 In[29]:
620 In[29]:
621
621
622 .. code:: python
622 .. code:: python
623
623
624 print lst2[0][1]
624 print lst2[0][1]
625 print arr2[0,1]
625 print arr2[0,1]
626
626
627 .. parsed-literal::
627 .. parsed-literal::
628
628
629 2
629 2
630 2
630 2
631
631
632
632
633 Most of the array creation functions listed above can be used with more
633 Most of the array creation functions listed above can be used with more
634 than one dimension, for example:
634 than one dimension, for example:
635
635
636 In[30]:
636 In[30]:
637
637
638 .. code:: python
638 .. code:: python
639
639
640 np.zeros((2,3))
640 np.zeros((2,3))
641
641
642 Out[30]:
642 Out[30]:
643
643
644 .. parsed-literal::
644 .. parsed-literal::
645
645
646 array([[ 0., 0., 0.],
646 array([[ 0., 0., 0.],
647 [ 0., 0., 0.]])
647 [ 0., 0., 0.]])
648
648
649 In[31]:
649 In[31]:
650
650
651 .. code:: python
651 .. code:: python
652
652
653 np.random.normal(10, 3, (2, 4))
653 np.random.normal(10, 3, (2, 4))
654
654
655 Out[31]:
655 Out[31]:
656
656
657 .. parsed-literal::
657 .. parsed-literal::
658
658
659 array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
659 array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
660 [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
660 [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
661
661
662 In fact, the shape of an array can be changed at any time, as long as
662 In fact, the shape of an array can be changed at any time, as long as
663 the total number of elements is unchanged. For example, if we want a 2x4
663 the total number of elements is unchanged. For example, if we want a 2x4
664 array with numbers increasing from 0, the easiest way to create it is:
664 array with numbers increasing from 0, the easiest way to create it is:
665
665
666 In[32]:
666 In[32]:
667
667
668 .. code:: python
668 .. code:: python
669
669
670 arr = np.arange(8).reshape(2,4)
670 arr = np.arange(8).reshape(2,4)
671 print arr
671 print arr
672
672
673 .. parsed-literal::
673 .. parsed-literal::
674
674
675 [[0 1 2 3]
675 [[0 1 2 3]
676 [4 5 6 7]]
676 [4 5 6 7]]
677
677
678
678
679 With multidimensional arrays, you can also use slices, and you can mix
679 With multidimensional arrays, you can also use slices, and you can mix
680 and match slices and single indices in the different dimensions (using
680 and match slices and single indices in the different dimensions (using
681 the same array as above):
681 the same array as above):
682
682
683 In[33]:
683 In[33]:
684
684
685 .. code:: python
685 .. code:: python
686
686
687 print 'Slicing in the second row:', arr[1, 2:4]
687 print 'Slicing in the second row:', arr[1, 2:4]
688 print 'All rows, third column :', arr[:, 2]
688 print 'All rows, third column :', arr[:, 2]
689
689
690 .. parsed-literal::
690 .. parsed-literal::
691
691
692 Slicing in the second row: [6 7]
692 Slicing in the second row: [6 7]
693 All rows, third column : [2 6]
693 All rows, third column : [2 6]
694
694
695
695
696 If you only provide one index, then you will get an array with one less
696 If you only provide one index, then you will get an array with one less
697 dimension containing that row:
697 dimension containing that row:
698
698
699 In[34]:
699 In[34]:
700
700
701 .. code:: python
701 .. code:: python
702
702
703 print 'First row: ', arr[0]
703 print 'First row: ', arr[0]
704 print 'Second row: ', arr[1]
704 print 'Second row: ', arr[1]
705
705
706 .. parsed-literal::
706 .. parsed-literal::
707
707
708 First row: [0 1 2 3]
708 First row: [0 1 2 3]
709 Second row: [4 5 6 7]
709 Second row: [4 5 6 7]
710
710
711
711
712 Now that we have seen how to create arrays with more than one dimension,
712 Now that we have seen how to create arrays with more than one dimension,
713 it's a good idea to look at some of the most useful properties and
713 it's a good idea to look at some of the most useful properties and
714 methods that arrays have. The following provide basic information about
714 methods that arrays have. The following provide basic information about
715 the size, shape and data in the array:
715 the size, shape and data in the array:
716
716
717 In[35]:
717 In[35]:
718
718
719 .. code:: python
719 .. code:: python
720
720
721 print 'Data type :', arr.dtype
721 print 'Data type :', arr.dtype
722 print 'Total number of elements :', arr.size
722 print 'Total number of elements :', arr.size
723 print 'Number of dimensions :', arr.ndim
723 print 'Number of dimensions :', arr.ndim
724 print 'Shape (dimensionality) :', arr.shape
724 print 'Shape (dimensionality) :', arr.shape
725 print 'Memory used (in bytes) :', arr.nbytes
725 print 'Memory used (in bytes) :', arr.nbytes
726
726
727 .. parsed-literal::
727 .. parsed-literal::
728
728
729 Data type : int32
729 Data type : int32
730 Total number of elements : 8
730 Total number of elements : 8
731 Number of dimensions : 2
731 Number of dimensions : 2
732 Shape (dimensionality) : (2, 4)
732 Shape (dimensionality) : (2, 4)
733 Memory used (in bytes) : 32
733 Memory used (in bytes) : 32
734
734
735
735
736 Arrays also have many useful methods, some especially useful ones are:
736 Arrays also have many useful methods, some especially useful ones are:
737
737
738 In[36]:
738 In[36]:
739
739
740 .. code:: python
740 .. code:: python
741
741
742 print 'Minimum and maximum :', arr.min(), arr.max()
742 print 'Minimum and maximum :', arr.min(), arr.max()
743 print 'Sum and product of all elements :', arr.sum(), arr.prod()
743 print 'Sum and product of all elements :', arr.sum(), arr.prod()
744 print 'Mean and standard deviation :', arr.mean(), arr.std()
744 print 'Mean and standard deviation :', arr.mean(), arr.std()
745
745
746 .. parsed-literal::
746 .. parsed-literal::
747
747
748 Minimum and maximum : 0 7
748 Minimum and maximum : 0 7
749 Sum and product of all elements : 28 0
749 Sum and product of all elements : 28 0
750 Mean and standard deviation : 3.5 2.29128784748
750 Mean and standard deviation : 3.5 2.29128784748
751
751
752
752
753 For these methods, the above operations area all computed on all the
753 For these methods, the above operations area all computed on all the
754 elements of the array. But for a multidimensional array, it's possible
754 elements of the array. But for a multidimensional array, it's possible
755 to do the computation along a single dimension, by passing the ``axis``
755 to do the computation along a single dimension, by passing the ``axis``
756 parameter; for example:
756 parameter; for example:
757
757
758 In[37]:
758 In[37]:
759
759
760 .. code:: python
760 .. code:: python
761
761
762 print 'For the following array:\n', arr
762 print 'For the following array:\n', arr
763 print 'The sum of elements along the rows is :', arr.sum(axis=1)
763 print 'The sum of elements along the rows is :', arr.sum(axis=1)
764 print 'The sum of elements along the columns is :', arr.sum(axis=0)
764 print 'The sum of elements along the columns is :', arr.sum(axis=0)
765
765
766 .. parsed-literal::
766 .. parsed-literal::
767
767
768 For the following array:
768 For the following array:
769 [[0 1 2 3]
769 [[0 1 2 3]
770 [4 5 6 7]]
770 [4 5 6 7]]
771 The sum of elements along the rows is : [ 6 22]
771 The sum of elements along the rows is : [ 6 22]
772 The sum of elements along the columns is : [ 4 6 8 10]
772 The sum of elements along the columns is : [ 4 6 8 10]
773
773
774
774
775 As you can see in this example, the value of the ``axis`` parameter is
775 As you can see in this example, the value of the ``axis`` parameter is
776 the dimension which will be *consumed* once the operation has been
776 the dimension which will be *consumed* once the operation has been
777 carried out. This is why to sum along the rows we use ``axis=0``.
777 carried out. This is why to sum along the rows we use ``axis=0``.
778
778
779 This can be easily illustrated with an example that has more dimensions;
779 This can be easily illustrated with an example that has more dimensions;
780 we create an array with 4 dimensions and shape ``(3,4,5,6)`` and sum
780 we create an array with 4 dimensions and shape ``(3,4,5,6)`` and sum
781 along the axis number 2 (i.e. the *third* axis, since in Python all
781 along the axis number 2 (i.e. the *third* axis, since in Python all
782 counts are 0-based). That consumes the dimension whose length was 5,
782 counts are 0-based). That consumes the dimension whose length was 5,
783 leaving us with a new array that has shape ``(3,4,6)``:
783 leaving us with a new array that has shape ``(3,4,6)``:
784
784
785 In[38]:
785 In[38]:
786
786
787 .. code:: python
787 .. code:: python
788
788
789 np.zeros((3,4,5,6)).sum(2).shape
789 np.zeros((3,4,5,6)).sum(2).shape
790
790
791 Out[38]:
791 Out[38]:
792
792
793 .. parsed-literal::
793 .. parsed-literal::
794
794
795 (3, 4, 6)
795 (3, 4, 6)
796
796
797 Another widely used property of arrays is the ``.T`` attribute, which
797 Another widely used property of arrays is the ``.T`` attribute, which
798 allows you to access the transpose of the array:
798 allows you to access the transpose of the array:
799
799
800 In[39]:
800 In[39]:
801
801
802 .. code:: python
802 .. code:: python
803
803
804 print 'Array:\n', arr
804 print 'Array:\n', arr
805 print 'Transpose:\n', arr.T
805 print 'Transpose:\n', arr.T
806
806
807 .. parsed-literal::
807 .. parsed-literal::
808
808
809 Array:
809 Array:
810 [[0 1 2 3]
810 [[0 1 2 3]
811 [4 5 6 7]]
811 [4 5 6 7]]
812 Transpose:
812 Transpose:
813 [[0 4]
813 [[0 4]
814 [1 5]
814 [1 5]
815 [2 6]
815 [2 6]
816 [3 7]]
816 [3 7]]
817
817
818
818
819 We don't have time here to look at all the methods and properties of
819 We don't have time here to look at all the methods and properties of
820 arrays, here's a complete list. Simply try exploring some of these
820 arrays, here's a complete list. Simply try exploring some of these
821 IPython to learn more, or read their description in the full Numpy
821 IPython to learn more, or read their description in the full Numpy
822 documentation:
822 documentation:
823
823
824 ::
824 ::
825
825
826 arr.T arr.copy arr.getfield arr.put arr.squeeze
826 arr.T arr.copy arr.getfield arr.put arr.squeeze
827 arr.all arr.ctypes arr.imag arr.ravel arr.std
827 arr.all arr.ctypes arr.imag arr.ravel arr.std
828 arr.any arr.cumprod arr.item arr.real arr.strides
828 arr.any arr.cumprod arr.item arr.real arr.strides
829 arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
829 arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
830 arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
830 arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
831 arr.argsort arr.diagonal arr.max arr.resize arr.take
831 arr.argsort arr.diagonal arr.max arr.resize arr.take
832 arr.astype arr.dot arr.mean arr.round arr.tofile
832 arr.astype arr.dot arr.mean arr.round arr.tofile
833 arr.base arr.dtype arr.min arr.searchsorted arr.tolist
833 arr.base arr.dtype arr.min arr.searchsorted arr.tolist
834 arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
834 arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
835 arr.choose arr.dumps arr.ndim arr.setfield arr.trace
835 arr.choose arr.dumps arr.ndim arr.setfield arr.trace
836 arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
836 arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
837 arr.compress arr.flags arr.nonzero arr.shape arr.var
837 arr.compress arr.flags arr.nonzero arr.shape arr.var
838 arr.conj arr.flat arr.prod arr.size arr.view
838 arr.conj arr.flat arr.prod arr.size arr.view
839 arr.conjugate arr.flatten arr.ptp arr.sort
839 arr.conjugate arr.flatten arr.ptp arr.sort
840
840
841
841
842 Operating with arrays
842 Operating with arrays
843 ---------------------
843 ---------------------
844
844
845 Arrays support all regular arithmetic operators, and the numpy library
845 Arrays support all regular arithmetic operators, and the numpy library
846 also contains a complete collection of basic mathematical functions that
846 also contains a complete collection of basic mathematical functions that
847 operate on arrays. It is important to remember that in general, all
847 operate on arrays. It is important to remember that in general, all
848 operations with arrays are applied *element-wise*, i.e., are applied to
848 operations with arrays are applied *element-wise*, i.e., are applied to
849 all the elements of the array at the same time. Consider for example:
849 all the elements of the array at the same time. Consider for example:
850
850
851 In[40]:
851 In[40]:
852
852
853 .. code:: python
853 .. code:: python
854
854
855 arr1 = np.arange(4)
855 arr1 = np.arange(4)
856 arr2 = np.arange(10, 14)
856 arr2 = np.arange(10, 14)
857 print arr1, '+', arr2, '=', arr1+arr2
857 print arr1, '+', arr2, '=', arr1+arr2
858
858
859 .. parsed-literal::
859 .. parsed-literal::
860
860
861 [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
861 [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
862
862
863
863
864 Importantly, you must remember that even the multiplication operator is
864 Importantly, you must remember that even the multiplication operator is
865 by default applied element-wise, it is *not* the matrix multiplication
865 by default applied element-wise, it is *not* the matrix multiplication
866 from linear algebra (as is the case in Matlab, for example):
866 from linear algebra (as is the case in Matlab, for example):
867
867
868 In[41]:
868 In[41]:
869
869
870 .. code:: python
870 .. code:: python
871
871
872 print arr1, '*', arr2, '=', arr1*arr2
872 print arr1, '*', arr2, '=', arr1*arr2
873
873
874 .. parsed-literal::
874 .. parsed-literal::
875
875
876 [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
876 [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
877
877
878
878
879 While this means that in principle arrays must always match in their
879 While this means that in principle arrays must always match in their
880 dimensionality in order for an operation to be valid, numpy will
880 dimensionality in order for an operation to be valid, numpy will
881 *broadcast* dimensions when possible. For example, suppose that you want
881 *broadcast* dimensions when possible. For example, suppose that you want
882 to add the number 1.5 to ``arr1``; the following would be a valid way to
882 to add the number 1.5 to ``arr1``; the following would be a valid way to
883 do it:
883 do it:
884
884
885 In[42]:
885 In[42]:
886
886
887 .. code:: python
887 .. code:: python
888
888
889 arr1 + 1.5*np.ones(4)
889 arr1 + 1.5*np.ones(4)
890
890
891 Out[42]:
891 Out[42]:
892
892
893 .. parsed-literal::
893 .. parsed-literal::
894
894
895 array([ 1.5, 2.5, 3.5, 4.5])
895 array([ 1.5, 2.5, 3.5, 4.5])
896
896
897 But thanks to numpy's broadcasting rules, the following is equally
897 But thanks to numpy's broadcasting rules, the following is equally
898 valid:
898 valid:
899
899
900 In[43]:
900 In[43]:
901
901
902 .. code:: python
902 .. code:: python
903
903
904 arr1 + 1.5
904 arr1 + 1.5
905
905
906 Out[43]:
906 Out[43]:
907
907
908 .. parsed-literal::
908 .. parsed-literal::
909
909
910 array([ 1.5, 2.5, 3.5, 4.5])
910 array([ 1.5, 2.5, 3.5, 4.5])
911
911
912 In this case, numpy looked at both operands and saw that the first
912 In this case, numpy looked at both operands and saw that the first
913 (``arr1``) was a one-dimensional array of length 4 and the second was a
913 (``arr1``) was a one-dimensional array of length 4 and the second was a
914 scalar, considered a zero-dimensional object. The broadcasting rules
914 scalar, considered a zero-dimensional object. The broadcasting rules
915 allow numpy to:
915 allow numpy to:
916
916
917 - *create* new dimensions of length 1 (since this doesn't change the
917 - *create* new dimensions of length 1 (since this doesn't change the
918 size of the array)
918 size of the array)
919 - 'stretch' a dimension of length 1 that needs to be matched to a
919 - 'stretch' a dimension of length 1 that needs to be matched to a
920 dimension of a different size.
920 dimension of a different size.
921
921
922 So in the above example, the scalar 1.5 is effectively:
922 So in the above example, the scalar 1.5 is effectively:
923
923
924 - first 'promoted' to a 1-dimensional array of length 1
924 - first 'promoted' to a 1-dimensional array of length 1
925 - then, this array is 'stretched' to length 4 to match the dimension of
925 - then, this array is 'stretched' to length 4 to match the dimension of
926 ``arr1``.
926 ``arr1``.
927
927
928 After these two operations are complete, the addition can proceed as now
928 After these two operations are complete, the addition can proceed as now
929 both operands are one-dimensional arrays of length 4.
929 both operands are one-dimensional arrays of length 4.
930
930
931 This broadcasting behavior is in practice enormously powerful,
931 This broadcasting behavior is in practice enormously powerful,
932 especially because when numpy broadcasts to create new dimensions or to
932 especially because when numpy broadcasts to create new dimensions or to
933 'stretch' existing ones, it doesn't actually replicate the data. In the
933 'stretch' existing ones, it doesn't actually replicate the data. In the
934 example above the operation is carried *as if* the 1.5 was a 1-d array
934 example above the operation is carried *as if* the 1.5 was a 1-d array
935 with 1.5 in all of its entries, but no actual array was ever created.
935 with 1.5 in all of its entries, but no actual array was ever created.
936 This can save lots of memory in cases when the arrays in question are
936 This can save lots of memory in cases when the arrays in question are
937 large and can have significant performance implications.
937 large and can have significant performance implications.
938
938
939 The general rule is: when operating on two arrays, NumPy compares their
939 The general rule is: when operating on two arrays, NumPy compares their
940 shapes element-wise. It starts with the trailing dimensions, and works
940 shapes element-wise. It starts with the trailing dimensions, and works
941 its way forward, creating dimensions of length 1 as needed. Two
941 its way forward, creating dimensions of length 1 as needed. Two
942 dimensions are considered compatible when
942 dimensions are considered compatible when
943
943
944 - they are equal to begin with, or
944 - they are equal to begin with, or
945 - one of them is 1; in this case numpy will do the 'stretching' to make
945 - one of them is 1; in this case numpy will do the 'stretching' to make
946 them equal.
946 them equal.
947
947
948 If these conditions are not met, a
948 If these conditions are not met, a
949 ``ValueError: frames are not aligned`` exception is thrown, indicating
949 ``ValueError: frames are not aligned`` exception is thrown, indicating
950 that the arrays have incompatible shapes. The size of the resulting
950 that the arrays have incompatible shapes. The size of the resulting
951 array is the maximum size along each dimension of the input arrays.
951 array is the maximum size along each dimension of the input arrays.
952
952
953 This shows how the broadcasting rules work in several dimensions:
953 This shows how the broadcasting rules work in several dimensions:
954
954
955 In[44]:
955 In[44]:
956
956
957 .. code:: python
957 .. code:: python
958
958
959 b = np.array([2, 3, 4, 5])
959 b = np.array([2, 3, 4, 5])
960 print arr, '\n\n+', b , '\n----------------\n', arr + b
960 print arr, '\n\n+', b , '\n----------------\n', arr + b
961
961
962 .. parsed-literal::
962 .. parsed-literal::
963
963
964 [[0 1 2 3]
964 [[0 1 2 3]
965 [4 5 6 7]]
965 [4 5 6 7]]
966
966
967 + [2 3 4 5]
967 + [2 3 4 5]
968 ----------------
968 ----------------
969 [[ 2 4 6 8]
969 [[ 2 4 6 8]
970 [ 6 8 10 12]]
970 [ 6 8 10 12]]
971
971
972
972
973 Now, how could you use broadcasting to say add ``[4, 6]`` along the rows
973 Now, how could you use broadcasting to say add ``[4, 6]`` along the rows
974 to ``arr`` above? Simply performing the direct addition will produce the
974 to ``arr`` above? Simply performing the direct addition will produce the
975 error we previously mentioned:
975 error we previously mentioned:
976
976
977 In[45]:
977 In[45]:
978
978
979 .. code:: python
979 .. code:: python
980
980
981 c = np.array([4, 6])
981 c = np.array([4, 6])
982 arr + c
982 arr + c
983
983
984 ::
984 ::
985
985
986 ---------------------------------------------------------------------------
986 ---------------------------------------------------------------------------
987 ValueError Traceback (most recent call last)
987 ValueError Traceback (most recent call last)
988 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
988 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
989 1 c = np.array([4, 6])
989 1 c = np.array([4, 6])
990 ----> 2 arr + c
990 ----> 2 arr + c
991
991
992 ValueError: operands could not be broadcast together with shapes (2,4) (2)
992 ValueError: operands could not be broadcast together with shapes (2,4) (2)
993
993
994 According to the rules above, the array ``c`` would need to have a
994 According to the rules above, the array ``c`` would need to have a
995 *trailing* dimension of 1 for the broadcasting to work. It turns out
995 *trailing* dimension of 1 for the broadcasting to work. It turns out
996 that numpy allows you to 'inject' new dimensions anywhere into an array
996 that numpy allows you to 'inject' new dimensions anywhere into an array
997 on the fly, by indexing it with the special object ``np.newaxis``:
997 on the fly, by indexing it with the special object ``np.newaxis``:
998
998
999 In[46]:
999 In[46]:
1000
1000
1001 .. code:: python
1001 .. code:: python
1002
1002
1003 (c[:, np.newaxis]).shape
1003 (c[:, np.newaxis]).shape
1004
1004
1005 Out[46]:
1005 Out[46]:
1006
1006
1007 .. parsed-literal::
1007 .. parsed-literal::
1008
1008
1009 (2, 1)
1009 (2, 1)
1010
1010
1011 This is exactly what we need, and indeed it works:
1011 This is exactly what we need, and indeed it works:
1012
1012
1013 In[47]:
1013 In[47]:
1014
1014
1015 .. code:: python
1015 .. code:: python
1016
1016
1017 arr + c[:, np.newaxis]
1017 arr + c[:, np.newaxis]
1018
1018
1019 Out[47]:
1019 Out[47]:
1020
1020
1021 .. parsed-literal::
1021 .. parsed-literal::
1022
1022
1023 array([[ 4, 5, 6, 7],
1023 array([[ 4, 5, 6, 7],
1024 [10, 11, 12, 13]])
1024 [10, 11, 12, 13]])
1025
1025
1026 For the full broadcasting rules, please see the official Numpy docs,
1026 For the full broadcasting rules, please see the official Numpy docs,
1027 which describe them in detail and with more complex examples.
1027 which describe them in detail and with more complex examples.
1028
1028
1029 As we mentioned before, Numpy ships with a full complement of
1029 As we mentioned before, Numpy ships with a full complement of
1030 mathematical functions that work on entire arrays, including logarithms,
1030 mathematical functions that work on entire arrays, including logarithms,
1031 exponentials, trigonometric and hyperbolic trigonometric functions, etc.
1031 exponentials, trigonometric and hyperbolic trigonometric functions, etc.
1032 Furthermore, scipy ships a rich special function library in the
1032 Furthermore, scipy ships a rich special function library in the
1033 ``scipy.special`` module that includes Bessel, Airy, Fresnel, Laguerre
1033 ``scipy.special`` module that includes Bessel, Airy, Fresnel, Laguerre
1034 and other classical special functions. For example, sampling the sine
1034 and other classical special functions. For example, sampling the sine
1035 function at 100 points between :math:`0` and :math:`2\pi` is as simple
1035 function at 100 points between :math:`0` and :math:`2\pi` is as simple
1036 as:
1036 as:
1037
1037
1038 In[48]:
1038 In[48]:
1039
1039
1040 .. code:: python
1040 .. code:: python
1041
1041
1042 x = np.linspace(0, 2*np.pi, 100)
1042 x = np.linspace(0, 2*np.pi, 100)
1043 y = np.sin(x)
1043 y = np.sin(x)
1044
1044
1045 Linear algebra in numpy
1045 Linear algebra in numpy
1046 -----------------------
1046 -----------------------
1047
1047
1048 Numpy ships with a basic linear algebra library, and all arrays have a
1048 Numpy ships with a basic linear algebra library, and all arrays have a
1049 ``dot`` method whose behavior is that of the scalar dot product when its
1049 ``dot`` method whose behavior is that of the scalar dot product when its
1050 arguments are vectors (one-dimensional arrays) and the traditional
1050 arguments are vectors (one-dimensional arrays) and the traditional
1051 matrix multiplication when one or both of its arguments are
1051 matrix multiplication when one or both of its arguments are
1052 two-dimensional arrays:
1052 two-dimensional arrays:
1053
1053
1054 In[49]:
1054 In[49]:
1055
1055
1056 .. code:: python
1056 .. code:: python
1057
1057
1058 v1 = np.array([2, 3, 4])
1058 v1 = np.array([2, 3, 4])
1059 v2 = np.array([1, 0, 1])
1059 v2 = np.array([1, 0, 1])
1060 print v1, '.', v2, '=', v1.dot(v2)
1060 print v1, '.', v2, '=', v1.dot(v2)
1061
1061
1062 .. parsed-literal::
1062 .. parsed-literal::
1063
1063
1064 [2 3 4] . [1 0 1] = 6
1064 [2 3 4] . [1 0 1] = 6
1065
1065
1066
1066
1067 Here is a regular matrix-vector multiplication, note that the array
1067 Here is a regular matrix-vector multiplication, note that the array
1068 ``v1`` should be viewed as a *column* vector in traditional linear
1068 ``v1`` should be viewed as a *column* vector in traditional linear
1069 algebra notation; numpy makes no distinction between row and column
1069 algebra notation; numpy makes no distinction between row and column
1070 vectors and simply verifies that the dimensions match the required rules
1070 vectors and simply verifies that the dimensions match the required rules
1071 of matrix multiplication, in this case we have a :math:`2 \times 3`
1071 of matrix multiplication, in this case we have a :math:`2 \times 3`
1072 matrix multiplied by a 3-vector, which produces a 2-vector:
1072 matrix multiplied by a 3-vector, which produces a 2-vector:
1073
1073
1074 In[50]:
1074 In[50]:
1075
1075
1076 .. code:: python
1076 .. code:: python
1077
1077
1078 A = np.arange(6).reshape(2, 3)
1078 A = np.arange(6).reshape(2, 3)
1079 print A, 'x', v1, '=', A.dot(v1)
1079 print A, 'x', v1, '=', A.dot(v1)
1080
1080
1081 .. parsed-literal::
1081 .. parsed-literal::
1082
1082
1083 [[0 1 2]
1083 [[0 1 2]
1084 [3 4 5]] x [2 3 4] = [11 38]
1084 [3 4 5]] x [2 3 4] = [11 38]
1085
1085
1086
1086
1087 For matrix-matrix multiplication, the same dimension-matching rules must
1087 For matrix-matrix multiplication, the same dimension-matching rules must
1088 be satisfied, e.g. consider the difference between :math:`A \times A^T`:
1088 be satisfied, e.g. consider the difference between :math:`A \times A^T`:
1089
1089
1090 In[51]:
1090 In[51]:
1091
1091
1092 .. code:: python
1092 .. code:: python
1093
1093
1094 print A.dot(A.T)
1094 print A.dot(A.T)
1095
1095
1096 .. parsed-literal::
1096 .. parsed-literal::
1097
1097
1098 [[ 5 14]
1098 [[ 5 14]
1099 [14 50]]
1099 [14 50]]
1100
1100
1101
1101
1102 and :math:`A^T \times A`:
1102 and :math:`A^T \times A`:
1103
1103
1104 In[52]:
1104 In[52]:
1105
1105
1106 .. code:: python
1106 .. code:: python
1107
1107
1108 print A.T.dot(A)
1108 print A.T.dot(A)
1109
1109
1110 .. parsed-literal::
1110 .. parsed-literal::
1111
1111
1112 [[ 9 12 15]
1112 [[ 9 12 15]
1113 [12 17 22]
1113 [12 17 22]
1114 [15 22 29]]
1114 [15 22 29]]
1115
1115
1116
1116
1117 Furthermore, the ``numpy.linalg`` module includes additional
1117 Furthermore, the ``numpy.linalg`` module includes additional
1118 functionality such as determinants, matrix norms, Cholesky, eigenvalue
1118 functionality such as determinants, matrix norms, Cholesky, eigenvalue
1119 and singular value decompositions, etc. For even more linear algebra
1119 and singular value decompositions, etc. For even more linear algebra
1120 tools, ``scipy.linalg`` contains the majority of the tools in the
1120 tools, ``scipy.linalg`` contains the majority of the tools in the
1121 classic LAPACK libraries as well as functions to operate on sparse
1121 classic LAPACK libraries as well as functions to operate on sparse
1122 matrices. We refer the reader to the Numpy and Scipy documentations for
1122 matrices. We refer the reader to the Numpy and Scipy documentations for
1123 additional details on these.
1123 additional details on these.
1124
1124
1125 Reading and writing arrays to disk
1125 Reading and writing arrays to disk
1126 ----------------------------------
1126 ----------------------------------
1127
1127
1128 Numpy lets you read and write arrays into files in a number of ways. In
1128 Numpy lets you read and write arrays into files in a number of ways. In
1129 order to use these tools well, it is critical to understand the
1129 order to use these tools well, it is critical to understand the
1130 difference between a *text* and a *binary* file containing numerical
1130 difference between a *text* and a *binary* file containing numerical
1131 data. In a text file, the number :math:`\pi` could be written as
1131 data. In a text file, the number :math:`\pi` could be written as
1132 "3.141592653589793", for example: a string of digits that a human can
1132 "3.141592653589793", for example: a string of digits that a human can
1133 read, with in this case 15 decimal digits. In contrast, that same number
1133 read, with in this case 15 decimal digits. In contrast, that same number
1134 written to a binary file would be encoded as 8 characters (bytes) that
1134 written to a binary file would be encoded as 8 characters (bytes) that
1135 are not readable by a human but which contain the exact same data that
1135 are not readable by a human but which contain the exact same data that
1136 the variable ``pi`` had in the computer's memory.
1136 the variable ``pi`` had in the computer's memory.
1137
1137
1138 The tradeoffs between the two modes are thus:
1138 The tradeoffs between the two modes are thus:
1139
1139
1140 - Text mode: occupies more space, precision can be lost (if not all
1140 - Text mode: occupies more space, precision can be lost (if not all
1141 digits are written to disk), but is readable and editable by hand
1141 digits are written to disk), but is readable and editable by hand
1142 with a text editor. Can *only* be used for one- and two-dimensional
1142 with a text editor. Can *only* be used for one- and two-dimensional
1143 arrays.
1143 arrays.
1144
1144
1145 - Binary mode: compact and exact representation of the data in memory,
1145 - Binary mode: compact and exact representation of the data in memory,
1146 can't be read or edited by hand. Arrays of any size and
1146 can't be read or edited by hand. Arrays of any size and
1147 dimensionality can be saved and read without loss of information.
1147 dimensionality can be saved and read without loss of information.
1148
1148
1149 First, let's see how to read and write arrays in text mode. The
1149 First, let's see how to read and write arrays in text mode. The
1150 ``np.savetxt`` function saves an array to a text file, with options to
1150 ``np.savetxt`` function saves an array to a text file, with options to
1151 control the precision, separators and even adding a header:
1151 control the precision, separators and even adding a header:
1152
1152
1153 In[53]:
1153 In[53]:
1154
1154
1155 .. code:: python
1155 .. code:: python
1156
1156
1157 arr = np.arange(10).reshape(2, 5)
1157 arr = np.arange(10).reshape(2, 5)
1158 np.savetxt('test.out', arr, fmt='%.2e', header="My dataset")
1158 np.savetxt('test.out', arr, fmt='%.2e', header="My dataset")
1159 !cat test.out
1159 !cat test.out
1160
1160
1161 .. parsed-literal::
1161 .. parsed-literal::
1162
1162
1163 # My dataset
1163 # My dataset
1164 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
1164 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
1165 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
1165 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
1166
1166
1167
1167
1168 And this same type of file can then be read with the matching
1168 And this same type of file can then be read with the matching
1169 ``np.loadtxt`` function:
1169 ``np.loadtxt`` function:
1170
1170
1171 In[54]:
1171 In[54]:
1172
1172
1173 .. code:: python
1173 .. code:: python
1174
1174
1175 arr2 = np.loadtxt('test.out')
1175 arr2 = np.loadtxt('test.out')
1176 print arr2
1176 print arr2
1177
1177
1178 .. parsed-literal::
1178 .. parsed-literal::
1179
1179
1180 [[ 0. 1. 2. 3. 4.]
1180 [[ 0. 1. 2. 3. 4.]
1181 [ 5. 6. 7. 8. 9.]]
1181 [ 5. 6. 7. 8. 9.]]
1182
1182
1183
1183
1184 For binary data, Numpy provides the ``np.save`` and ``np.savez``
1184 For binary data, Numpy provides the ``np.save`` and ``np.savez``
1185 routines. The first saves a single array to a file with ``.npy``
1185 routines. The first saves a single array to a file with ``.npy``
1186 extension, while the latter can be used to save a *group* of arrays into
1186 extension, while the latter can be used to save a *group* of arrays into
1187 a single file with ``.npz`` extension. The files created with these
1187 a single file with ``.npz`` extension. The files created with these
1188 routines can then be read with the ``np.load`` function.
1188 routines can then be read with the ``np.load`` function.
1189
1189
1190 Let us first see how to use the simpler ``np.save`` function to save a
1190 Let us first see how to use the simpler ``np.save`` function to save a
1191 single array:
1191 single array:
1192
1192
1193 In[55]:
1193 In[55]:
1194
1194
1195 .. code:: python
1195 .. code:: python
1196
1196
1197 np.save('test.npy', arr2)
1197 np.save('test.npy', arr2)
1198 # Now we read this back
1198 # Now we read this back
1199 arr2n = np.load('test.npy')
1199 arr2n = np.load('test.npy')
1200 # Let's see if any element is non-zero in the difference.
1200 # Let's see if any element is non-zero in the difference.
1201 # A value of True would be a problem.
1201 # A value of True would be a problem.
1202 print 'Any differences?', np.any(arr2-arr2n)
1202 print 'Any differences?', np.any(arr2-arr2n)
1203
1203
1204 .. parsed-literal::
1204 .. parsed-literal::
1205
1205
1206 Any differences? False
1206 Any differences? False
1207
1207
1208
1208
1209 Now let us see how the ``np.savez`` function works. You give it a
1209 Now let us see how the ``np.savez`` function works. You give it a
1210 filename and either a sequence of arrays or a set of keywords. In the
1210 filename and either a sequence of arrays or a set of keywords. In the
1211 first mode, the function will auotmatically name the saved arrays in the
1211 first mode, the function will auotmatically name the saved arrays in the
1212 archive as ``arr_0``, ``arr_1``, etc:
1212 archive as ``arr_0``, ``arr_1``, etc:
1213
1213
1214 In[56]:
1214 In[56]:
1215
1215
1216 .. code:: python
1216 .. code:: python
1217
1217
1218 np.savez('test.npz', arr, arr2)
1218 np.savez('test.npz', arr, arr2)
1219 arrays = np.load('test.npz')
1219 arrays = np.load('test.npz')
1220 arrays.files
1220 arrays.files
1221
1221
1222 Out[56]:
1222 Out[56]:
1223
1223
1224 .. parsed-literal::
1224 .. parsed-literal::
1225
1225
1226 ['arr_1', 'arr_0']
1226 ['arr_1', 'arr_0']
1227
1227
1228 Alternatively, we can explicitly choose how to name the arrays we save:
1228 Alternatively, we can explicitly choose how to name the arrays we save:
1229
1229
1230 In[57]:
1230 In[57]:
1231
1231
1232 .. code:: python
1232 .. code:: python
1233
1233
1234 np.savez('test.npz', array1=arr, array2=arr2)
1234 np.savez('test.npz', array1=arr, array2=arr2)
1235 arrays = np.load('test.npz')
1235 arrays = np.load('test.npz')
1236 arrays.files
1236 arrays.files
1237
1237
1238 Out[57]:
1238 Out[57]:
1239
1239
1240 .. parsed-literal::
1240 .. parsed-literal::
1241
1241
1242 ['array2', 'array1']
1242 ['array2', 'array1']
1243
1243
1244 The object returned by ``np.load`` from an ``.npz`` file works like a
1244 The object returned by ``np.load`` from an ``.npz`` file works like a
1245 dictionary, though you can also access its constituent files by
1245 dictionary, though you can also access its constituent files by
1246 attribute using its special ``.f`` field; this is best illustrated with
1246 attribute using its special ``.f`` field; this is best illustrated with
1247 an example with the ``arrays`` object from above:
1247 an example with the ``arrays`` object from above:
1248
1248
1249 In[58]:
1249 In[58]:
1250
1250
1251 .. code:: python
1251 .. code:: python
1252
1252
1253 print 'First row of first array:', arrays['array1'][0]
1253 print 'First row of first array:', arrays['array1'][0]
1254 # This is an equivalent way to get the same field
1254 # This is an equivalent way to get the same field
1255 print 'First row of first array:', arrays.f.array1[0]
1255 print 'First row of first array:', arrays.f.array1[0]
1256
1256
1257 .. parsed-literal::
1257 .. parsed-literal::
1258
1258
1259 First row of first array: [0 1 2 3 4]
1259 First row of first array: [0 1 2 3 4]
1260 First row of first array: [0 1 2 3 4]
1260 First row of first array: [0 1 2 3 4]
1261
1261
1262
1262
1263 This ``.npz`` format is a very convenient way to package compactly and
1263 This ``.npz`` format is a very convenient way to package compactly and
1264 without loss of information, into a single file, a group of related
1264 without loss of information, into a single file, a group of related
1265 arrays that pertain to a specific problem. At some point, however, the
1265 arrays that pertain to a specific problem. At some point, however, the
1266 complexity of your dataset may be such that the optimal approach is to
1266 complexity of your dataset may be such that the optimal approach is to
1267 use one of the standard formats in scientific data processing that have
1267 use one of the standard formats in scientific data processing that have
1268 been designed to handle complex datasets, such as NetCDF or HDF5.
1268 been designed to handle complex datasets, such as NetCDF or HDF5.
1269
1269
1270 Fortunately, there are tools for manipulating these formats in Python,
1270 Fortunately, there are tools for manipulating these formats in Python,
1271 and for storing data in other ways such as databases. A complete
1271 and for storing data in other ways such as databases. A complete
1272 discussion of the possibilities is beyond the scope of this discussion,
1272 discussion of the possibilities is beyond the scope of this discussion,
1273 but of particular interest for scientific users we at least mention the
1273 but of particular interest for scientific users we at least mention the
1274 following:
1274 following:
1275
1275
1276 - The ``scipy.io`` module contains routines to read and write Matlab
1276 - The ``scipy.io`` module contains routines to read and write Matlab
1277 files in ``.mat`` format and files in the NetCDF format that is
1277 files in ``.mat`` format and files in the NetCDF format that is
1278 widely used in certain scientific disciplines.
1278 widely used in certain scientific disciplines.
1279
1279
1280 - For manipulating files in the HDF5 format, there are two excellent
1280 - For manipulating files in the HDF5 format, there are two excellent
1281 options in Python: The PyTables project offers a high-level, object
1281 options in Python: The PyTables project offers a high-level, object
1282 oriented approach to manipulating HDF5 datasets, while the h5py
1282 oriented approach to manipulating HDF5 datasets, while the h5py
1283 project offers a more direct mapping to the standard HDF5 library
1283 project offers a more direct mapping to the standard HDF5 library
1284 interface. Both are excellent tools; if you need to work with HDF5
1284 interface. Both are excellent tools; if you need to work with HDF5
1285 datasets you should read some of their documentation and examples and
1285 datasets you should read some of their documentation and examples and
1286 decide which approach is a better match for your needs.
1286 decide which approach is a better match for your needs.
1287
1287
1288
1288
1289
1289
1290 High quality data visualization with Matplotlib
1290 High quality data visualization with Matplotlib
1291 ===============================================
1291 ===============================================
1292
1292
1293 The `matplotlib <http://matplotlib.sf.net>`_ library is a powerful tool
1293 The `matplotlib <http://matplotlib.sf.net>`_ library is a powerful tool
1294 capable of producing complex publication-quality figures with fine
1294 capable of producing complex publication-quality figures with fine
1295 layout control in two and three dimensions; here we will only provide a
1295 layout control in two and three dimensions; here we will only provide a
1296 minimal self-contained introduction to its usage that covers the
1296 minimal self-contained introduction to its usage that covers the
1297 functionality needed for the rest of the book. We encourage the reader
1297 functionality needed for the rest of the book. We encourage the reader
1298 to read the tutorials included with the matplotlib documentation as well
1298 to read the tutorials included with the matplotlib documentation as well
1299 as to browse its extensive gallery of examples that include source code.
1299 as to browse its extensive gallery of examples that include source code.
1300
1300
1301 Just as we typically use the shorthand ``np`` for Numpy, we will use
1301 Just as we typically use the shorthand ``np`` for Numpy, we will use
1302 ``plt`` for the ``matplotlib.pyplot`` module where the easy-to-use
1302 ``plt`` for the ``matplotlib.pyplot`` module where the easy-to-use
1303 plotting functions reside (the library contains a rich object-oriented
1303 plotting functions reside (the library contains a rich object-oriented
1304 architecture that we don't have the space to discuss here):
1304 architecture that we don't have the space to discuss here):
1305
1305
1306 In[59]:
1306 In[59]:
1307
1307
1308 .. code:: python
1308 .. code:: python
1309
1309
1310 import matplotlib.pyplot as plt
1310 import matplotlib.pyplot as plt
1311
1311
1312 The most frequently used function is simply called ``plot``, here is how
1312 The most frequently used function is simply called ``plot``, here is how
1313 you can make a simple plot of :math:`\sin(x)` for
1313 you can make a simple plot of :math:`\sin(x)` for
1314 :math:`x \in [0, 2\pi]` with labels and a grid (we use the semicolon in
1314 :math:`x \in [0, 2\pi]` with labels and a grid (we use the semicolon in
1315 the last line to suppress the display of some information that is
1315 the last line to suppress the display of some information that is
1316 unnecessary right now):
1316 unnecessary right now):
1317
1317
1318 In[60]:
1318 In[60]:
1319
1319
1320 .. code:: python
1320 .. code:: python
1321
1321
1322 x = np.linspace(0, 2*np.pi)
1322 x = np.linspace(0, 2*np.pi)
1323 y = np.sin(x)
1323 y = np.sin(x)
1324 plt.plot(x,y, label='sin(x)')
1324 plt.plot(x,y, label='sin(x)')
1325 plt.legend()
1325 plt.legend()
1326 plt.grid()
1326 plt.grid()
1327 plt.title('Harmonic')
1327 plt.title('Harmonic')
1328 plt.xlabel('x')
1328 plt.xlabel('x')
1329 plt.ylabel('y');
1329 plt.ylabel('y');
1330
1330
1331 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.svg
1331 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.svg
1332
1332
1333 You can control the style, color and other properties of the markers,
1333 You can control the style, color and other properties of the markers,
1334 for example:
1334 for example:
1335
1335
1336 In[61]:
1336 In[61]:
1337
1337
1338 .. code:: python
1338 .. code:: python
1339
1339
1340 plt.plot(x, y, linewidth=2);
1340 plt.plot(x, y, linewidth=2);
1341
1341
1342 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.svg
1342 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.svg
1343
1343
1344 In[62]:
1344 In[62]:
1345
1345
1346 .. code:: python
1346 .. code:: python
1347
1347
1348 plt.plot(x, y, 'o', markersize=5, color='r');
1348 plt.plot(x, y, 'o', markersize=5, color='r');
1349
1349
1350 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.svg
1350 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.svg
1351
1351
1352 We will now see how to create a few other common plot types, such as a
1352 We will now see how to create a few other common plot types, such as a
1353 simple error plot:
1353 simple error plot:
1354
1354
1355 In[63]:
1355 In[63]:
1356
1356
1357 .. code:: python
1357 .. code:: python
1358
1358
1359 # example data
1359 # example data
1360 x = np.arange(0.1, 4, 0.5)
1360 x = np.arange(0.1, 4, 0.5)
1361 y = np.exp(-x)
1361 y = np.exp(-x)
1362
1362
1363 # example variable error bar values
1363 # example variable error bar values
1364 yerr = 0.1 + 0.2*np.sqrt(x)
1364 yerr = 0.1 + 0.2*np.sqrt(x)
1365 xerr = 0.1 + yerr
1365 xerr = 0.1 + yerr
1366
1366
1367 # First illustrate basic pyplot interface, using defaults where possible.
1367 # First illustrate basic pyplot interface, using defaults where possible.
1368 plt.figure()
1368 plt.figure()
1369 plt.errorbar(x, y, xerr=0.2, yerr=0.4)
1369 plt.errorbar(x, y, xerr=0.2, yerr=0.4)
1370 plt.title("Simplest errorbars, 0.2 in x, 0.4 in y");
1370 plt.title("Simplest errorbars, 0.2 in x, 0.4 in y");
1371
1371
1372 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.svg
1372 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.svg
1373
1373
1374 A simple log plot
1374 A simple log plot
1375
1375
1376 In[64]:
1376 In[64]:
1377
1377
1378 .. code:: python
1378 .. code:: python
1379
1379
1380 x = np.linspace(-5, 5)
1380 x = np.linspace(-5, 5)
1381 y = np.exp(-x**2)
1381 y = np.exp(-x**2)
1382 plt.semilogy(x, y);
1382 plt.semilogy(x, y);
1383
1383
1384 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.svg
1384 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.svg
1385
1385
1386 A histogram annotated with text inside the plot, using the ``text``
1386 A histogram annotated with text inside the plot, using the ``text``
1387 function:
1387 function:
1388
1388
1389 In[65]:
1389 In[65]:
1390
1390
1391 .. code:: python
1391 .. code:: python
1392
1392
1393 mu, sigma = 100, 15
1393 mu, sigma = 100, 15
1394 x = mu + sigma * np.random.randn(10000)
1394 x = mu + sigma * np.random.randn(10000)
1395
1395
1396 # the histogram of the data
1396 # the histogram of the data
1397 n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)
1397 n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)
1398
1398
1399 plt.xlabel('Smarts')
1399 plt.xlabel('Smarts')
1400 plt.ylabel('Probability')
1400 plt.ylabel('Probability')
1401 plt.title('Histogram of IQ')
1401 plt.title('Histogram of IQ')
1402 # This will put a text fragment at the position given:
1402 # This will put a text fragment at the position given:
1403 plt.text(55, .027, r'$\mu=100,\ \sigma=15$', fontsize=14)
1403 plt.text(55, .027, r'$\mu=100,\ \sigma=15$', fontsize=14)
1404 plt.axis([40, 160, 0, 0.03])
1404 plt.axis([40, 160, 0, 0.03])
1405 plt.grid(True)
1405 plt.grid(True)
1406
1406
1407 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.svg
1407 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.svg
1408
1408
1409 Image display
1409 Image display
1410 -------------
1410 -------------
1411
1411
1412 The ``imshow`` command can display single or multi-channel images. A
1412 The ``imshow`` command can display single or multi-channel images. A
1413 simple array of random numbers, plotted in grayscale:
1413 simple array of random numbers, plotted in grayscale:
1414
1414
1415 In[66]:
1415 In[66]:
1416
1416
1417 .. code:: python
1417 .. code:: python
1418
1418
1419 from matplotlib import cm
1419 from matplotlib import cm
1420 plt.imshow(np.random.rand(5, 10), cmap=cm.gray, interpolation='nearest');
1420 plt.imshow(np.random.rand(5, 10), cmap=cm.gray, interpolation='nearest');
1421
1421
1422 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.svg
1422 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.svg
1423
1423
1424 A real photograph is a multichannel image, ``imshow`` interprets it
1424 A real photograph is a multichannel image, ``imshow`` interprets it
1425 correctly:
1425 correctly:
1426
1426
1427 In[67]:
1427 In[67]:
1428
1428
1429 .. code:: python
1429 .. code:: python
1430
1430
1431 img = plt.imread('stinkbug.png')
1431 img = plt.imread('stinkbug.png')
1432 print 'Dimensions of the array img:', img.shape
1432 print 'Dimensions of the array img:', img.shape
1433 plt.imshow(img);
1433 plt.imshow(img);
1434
1434
1435 .. parsed-literal::
1435 .. parsed-literal::
1436
1436
1437 Dimensions of the array img: (375, 500, 3)
1437 Dimensions of the array img: (375, 500, 3)
1438
1438
1439
1439
1440 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.svg
1440 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.svg
1441
1441
1442 Simple 3d plotting with matplotlib
1442 Simple 3d plotting with matplotlib
1443 ----------------------------------
1443 ----------------------------------
1444
1444
1445 Note that you must execute at least once in your session:
1445 Note that you must execute at least once in your session:
1446
1446
1447 In[68]:
1447 In[68]:
1448
1448
1449 .. code:: python
1449 .. code:: python
1450
1450
1451 from mpl_toolkits.mplot3d import Axes3D
1451 from mpl_toolkits.mplot3d import Axes3D
1452
1452
1453 One this has been done, you can create 3d axes with the
1453 One this has been done, you can create 3d axes with the
1454 ``projection='3d'`` keyword to ``add_subplot``:
1454 ``projection='3d'`` keyword to ``add_subplot``:
1455
1455
1456 ::
1456 ::
1457
1457
1458 fig = plt.figure()
1458 fig = plt.figure()
1459 fig.add_subplot(<other arguments here>, projection='3d')
1459 fig.add_subplot(<other arguments here>, projection='3d')
1460
1460
1461
1461
1462 A simple surface plot:
1462 A simple surface plot:
1463
1463
1464 In[72]:
1464 In[72]:
1465
1465
1466 .. code:: python
1466 .. code:: python
1467
1467
1468 from mpl_toolkits.mplot3d.axes3d import Axes3D
1468 from mpl_toolkits.mplot3d.axes3d import Axes3D
1469 from matplotlib import cm
1469 from matplotlib import cm
1470
1470
1471 fig = plt.figure()
1471 fig = plt.figure()
1472 ax = fig.add_subplot(1, 1, 1, projection='3d')
1472 ax = fig.add_subplot(1, 1, 1, projection='3d')
1473 X = np.arange(-5, 5, 0.25)
1473 X = np.arange(-5, 5, 0.25)
1474 Y = np.arange(-5, 5, 0.25)
1474 Y = np.arange(-5, 5, 0.25)
1475 X, Y = np.meshgrid(X, Y)
1475 X, Y = np.meshgrid(X, Y)
1476 R = np.sqrt(X**2 + Y**2)
1476 R = np.sqrt(X**2 + Y**2)
1477 Z = np.sin(R)
1477 Z = np.sin(R)
1478 surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet,
1478 surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet,
1479 linewidth=0, antialiased=False)
1479 linewidth=0, antialiased=False)
1480 ax.set_zlim3d(-1.01, 1.01);
1480 ax.set_zlim3d(-1.01, 1.01);
1481
1481
1482 .. image:: /Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.svg
1482 .. image:: tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.svg
1483
1483
1484 IPython: a powerful interactive environment
1484 IPython: a powerful interactive environment
1485 ===========================================
1485 ===========================================
1486
1486
1487 A key component of the everyday workflow of most scientific computing
1487 A key component of the everyday workflow of most scientific computing
1488 environments is a good interactive environment, that is, a system in
1488 environments is a good interactive environment, that is, a system in
1489 which you can execute small amounts of code and view the results
1489 which you can execute small amounts of code and view the results
1490 immediately, combining both printing out data and opening graphical
1490 immediately, combining both printing out data and opening graphical
1491 visualizations. All modern systems for scientific computing, commercial
1491 visualizations. All modern systems for scientific computing, commercial
1492 and open source, include such functionality.
1492 and open source, include such functionality.
1493
1493
1494 Out of the box, Python also offers a simple interactive shell with very
1494 Out of the box, Python also offers a simple interactive shell with very
1495 limited capabilities. But just like the scientific community built Numpy
1495 limited capabilities. But just like the scientific community built Numpy
1496 to provide arrays suited for scientific work (since Pytyhon's lists
1496 to provide arrays suited for scientific work (since Pytyhon's lists
1497 aren't optimal for this task), it has also developed an interactive
1497 aren't optimal for this task), it has also developed an interactive
1498 environment much more sophisticated than the built-in one. The `IPython
1498 environment much more sophisticated than the built-in one. The `IPython
1499 project <http://ipython.org>`_ offers a set of tools to make productive
1499 project <http://ipython.org>`_ offers a set of tools to make productive
1500 use of the Python language, all the while working interactively and with
1500 use of the Python language, all the while working interactively and with
1501 immedate feedback on your results. The basic tools that IPython provides
1501 immedate feedback on your results. The basic tools that IPython provides
1502 are:
1502 are:
1503
1503
1504 1. A powerful terminal shell, with many features designed to increase
1504 1. A powerful terminal shell, with many features designed to increase
1505 the fluidity and productivity of everyday scientific workflows,
1505 the fluidity and productivity of everyday scientific workflows,
1506 including:
1506 including:
1507
1507
1508 - rich introspection of all objects and variables including easy
1508 - rich introspection of all objects and variables including easy
1509 access to the source code of any function
1509 access to the source code of any function
1510 - powerful and extensible tab completion of variables and filenames,
1510 - powerful and extensible tab completion of variables and filenames,
1511 - tight integration with matplotlib, supporting interactive figures
1511 - tight integration with matplotlib, supporting interactive figures
1512 that don't block the terminal,
1512 that don't block the terminal,
1513 - direct access to the filesystem and underlying operating system,
1513 - direct access to the filesystem and underlying operating system,
1514 - an extensible system for shell-like commands called 'magics' that
1514 - an extensible system for shell-like commands called 'magics' that
1515 reduce the work needed to perform many common tasks,
1515 reduce the work needed to perform many common tasks,
1516 - tools for easily running, timing, profiling and debugging your
1516 - tools for easily running, timing, profiling and debugging your
1517 codes,
1517 codes,
1518 - syntax highlighted error messages with much more detail than the
1518 - syntax highlighted error messages with much more detail than the
1519 default Python ones,
1519 default Python ones,
1520 - logging and access to all previous history of inputs, including
1520 - logging and access to all previous history of inputs, including
1521 across sessions
1521 across sessions
1522
1522
1523 2. A Qt console that provides the look and feel of a terminal, but adds
1523 2. A Qt console that provides the look and feel of a terminal, but adds
1524 support for inline figures, graphical calltips, a persistent session
1524 support for inline figures, graphical calltips, a persistent session
1525 that can survive crashes (even segfaults) of the kernel process, and
1525 that can survive crashes (even segfaults) of the kernel process, and
1526 more.
1526 more.
1527
1527
1528 3. A web-based notebook that can execute code and also contain rich text
1528 3. A web-based notebook that can execute code and also contain rich text
1529 and figures, mathematical equations and arbitrary HTML. This notebook
1529 and figures, mathematical equations and arbitrary HTML. This notebook
1530 presents a document-like view with cells where code is executed but
1530 presents a document-like view with cells where code is executed but
1531 that can be edited in-place, reordered, mixed with explanatory text
1531 that can be edited in-place, reordered, mixed with explanatory text
1532 and figures, etc.
1532 and figures, etc.
1533
1533
1534 4. A high-performance, low-latency system for parallel computing that
1534 4. A high-performance, low-latency system for parallel computing that
1535 supports the control of a cluster of IPython engines communicating
1535 supports the control of a cluster of IPython engines communicating
1536 over a network, with optimizations that minimize unnecessary copying
1536 over a network, with optimizations that minimize unnecessary copying
1537 of large objects (especially numpy arrays).
1537 of large objects (especially numpy arrays).
1538
1538
1539 We will now discuss the highlights of the tools 1-3 above so that you
1539 We will now discuss the highlights of the tools 1-3 above so that you
1540 can make them an effective part of your workflow. The topic of parallel
1540 can make them an effective part of your workflow. The topic of parallel
1541 computing is beyond the scope of this document, but we encourage you to
1541 computing is beyond the scope of this document, but we encourage you to
1542 read the extensive
1542 read the extensive
1543 `documentation <http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html>`_
1543 `documentation <http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html>`_
1544 and `tutorials <http://minrk.github.com/scipy-tutorial-2011/>`_ on this
1544 and `tutorials <http://minrk.github.com/scipy-tutorial-2011/>`_ on this
1545 available on the IPython website.
1545 available on the IPython website.
1546
1546
1547 The IPython terminal
1547 The IPython terminal
1548 --------------------
1548 --------------------
1549
1549
1550 You can start IPython at the terminal simply by typing:
1550 You can start IPython at the terminal simply by typing:
1551
1551
1552 ::
1552 ::
1553
1553
1554 $ ipython
1554 $ ipython
1555
1555
1556 which will provide you some basic information about how to get started
1556 which will provide you some basic information about how to get started
1557 and will then open a prompt labeled ``In [1]:`` for you to start typing.
1557 and will then open a prompt labeled ``In [1]:`` for you to start typing.
1558 Here we type :math:`2^{64}` and Python computes the result for us in
1558 Here we type :math:`2^{64}` and Python computes the result for us in
1559 exact arithmetic, returning it as ``Out[1]``:
1559 exact arithmetic, returning it as ``Out[1]``:
1560
1560
1561 ::
1561 ::
1562
1562
1563 $ ipython
1563 $ ipython
1564 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1564 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1565 Type "copyright", "credits" or "license" for more information.
1565 Type "copyright", "credits" or "license" for more information.
1566
1566
1567 IPython 0.13.dev -- An enhanced Interactive Python.
1567 IPython 0.13.dev -- An enhanced Interactive Python.
1568 ? -> Introduction and overview of IPython's features.
1568 ? -> Introduction and overview of IPython's features.
1569 %quickref -> Quick reference.
1569 %quickref -> Quick reference.
1570 help -> Python's own help system.
1570 help -> Python's own help system.
1571 object? -> Details about 'object', use 'object??' for extra details.
1571 object? -> Details about 'object', use 'object??' for extra details.
1572
1572
1573 In [1]: 2**64
1573 In [1]: 2**64
1574 Out[1]: 18446744073709551616L
1574 Out[1]: 18446744073709551616L
1575
1575
1576 The first thing you should know about IPython is that all your inputs
1576 The first thing you should know about IPython is that all your inputs
1577 and outputs are saved. There are two variables named ``In`` and ``Out``
1577 and outputs are saved. There are two variables named ``In`` and ``Out``
1578 which are filled as you work with your results. Furthermore, all outputs
1578 which are filled as you work with your results. Furthermore, all outputs
1579 are also saved to auto-created variables of the form ``_NN`` where
1579 are also saved to auto-created variables of the form ``_NN`` where
1580 ``NN`` is the prompt number, and inputs to ``_iNN``. This allows you to
1580 ``NN`` is the prompt number, and inputs to ``_iNN``. This allows you to
1581 recover quickly the result of a prior computation by referring to its
1581 recover quickly the result of a prior computation by referring to its
1582 number even if you forgot to store it as a variable. For example, later
1582 number even if you forgot to store it as a variable. For example, later
1583 on in the above session you can do:
1583 on in the above session you can do:
1584
1584
1585 ::
1585 ::
1586
1586
1587 In [6]: print _1
1587 In [6]: print _1
1588 18446744073709551616
1588 18446744073709551616
1589
1589
1590
1590
1591 We strongly recommend that you take a few minutes to read at least the
1591 We strongly recommend that you take a few minutes to read at least the
1592 basic introduction provided by the ``?`` command, and keep in mind that
1592 basic introduction provided by the ``?`` command, and keep in mind that
1593 the ``%quickref`` command at all times can be used as a quick reference
1593 the ``%quickref`` command at all times can be used as a quick reference
1594 "cheat sheet" of the most frequently used features of IPython.
1594 "cheat sheet" of the most frequently used features of IPython.
1595
1595
1596 At the IPython prompt, any valid Python code that you type will be
1596 At the IPython prompt, any valid Python code that you type will be
1597 executed similarly to the default Python shell (though often with more
1597 executed similarly to the default Python shell (though often with more
1598 informative feedback). But since IPython is a *superset* of the default
1598 informative feedback). But since IPython is a *superset* of the default
1599 Python shell; let's have a brief look at some of its additional
1599 Python shell; let's have a brief look at some of its additional
1600 functionality.
1600 functionality.
1601
1601
1602 **Object introspection**
1602 **Object introspection**
1603
1603
1604 A simple ``?`` command provides a general introduction to IPython, but
1604 A simple ``?`` command provides a general introduction to IPython, but
1605 as indicated in the banner above, you can use the ``?`` syntax to ask
1605 as indicated in the banner above, you can use the ``?`` syntax to ask
1606 for details about any object. For example, if we type ``_1?``, IPython
1606 for details about any object. For example, if we type ``_1?``, IPython
1607 will print the following details about this variable:
1607 will print the following details about this variable:
1608
1608
1609 ::
1609 ::
1610
1610
1611 In [14]: _1?
1611 In [14]: _1?
1612 Type: long
1612 Type: long
1613 Base Class: <type 'long'>
1613 Base Class: <type 'long'>
1614 String Form:18446744073709551616
1614 String Form:18446744073709551616
1615 Namespace: Interactive
1615 Namespace: Interactive
1616 Docstring:
1616 Docstring:
1617 long(x[, base]) -> integer
1617 long(x[, base]) -> integer
1618
1618
1619 Convert a string or number to a long integer, if possible. A floating
1619 Convert a string or number to a long integer, if possible. A floating
1620
1620
1621 [etc... snipped for brevity]
1621 [etc... snipped for brevity]
1622
1622
1623 If you add a second ``?`` and for any oobject ``x`` type ``x??``,
1623 If you add a second ``?`` and for any oobject ``x`` type ``x??``,
1624 IPython will try to provide an even more detailed analsysi of the
1624 IPython will try to provide an even more detailed analsysi of the
1625 object, including its syntax-highlighted source code when it can be
1625 object, including its syntax-highlighted source code when it can be
1626 found. It's possible that ``x??`` returns the same information as
1626 found. It's possible that ``x??`` returns the same information as
1627 ``x?``, but in many cases ``x??`` will indeed provide additional
1627 ``x?``, but in many cases ``x??`` will indeed provide additional
1628 details.
1628 details.
1629
1629
1630 Finally, the ``?`` syntax is also useful to search *namespaces* with
1630 Finally, the ``?`` syntax is also useful to search *namespaces* with
1631 wildcards. Suppose you are wondering if there is any function in Numpy
1631 wildcards. Suppose you are wondering if there is any function in Numpy
1632 that may do text-related things; with ``np.*txt*?``, IPython will print
1632 that may do text-related things; with ``np.*txt*?``, IPython will print
1633 all the names in the ``np`` namespace (our Numpy shorthand) that have
1633 all the names in the ``np`` namespace (our Numpy shorthand) that have
1634 'txt' anywhere in their name:
1634 'txt' anywhere in their name:
1635
1635
1636 ::
1636 ::
1637
1637
1638 In [17]: np.*txt*?
1638 In [17]: np.*txt*?
1639 np.genfromtxt
1639 np.genfromtxt
1640 np.loadtxt
1640 np.loadtxt
1641 np.mafromtxt
1641 np.mafromtxt
1642 np.ndfromtxt
1642 np.ndfromtxt
1643 np.recfromtxt
1643 np.recfromtxt
1644 np.savetxt
1644 np.savetxt
1645
1645
1646
1646
1647 **Tab completion**
1647 **Tab completion**
1648
1648
1649 IPython makes the tab key work extra hard for you as a way to rapidly
1649 IPython makes the tab key work extra hard for you as a way to rapidly
1650 inspect objects and libraries. Whenever you have typed something at the
1650 inspect objects and libraries. Whenever you have typed something at the
1651 prompt, by hitting the ``<tab>`` key IPython will try to complete the
1651 prompt, by hitting the ``<tab>`` key IPython will try to complete the
1652 rest of the line. For this, IPython will analyze the text you had so far
1652 rest of the line. For this, IPython will analyze the text you had so far
1653 and try to search for Python data or files that may match the context
1653 and try to search for Python data or files that may match the context
1654 you have already provided.
1654 you have already provided.
1655
1655
1656 For example, if you type ``np.load`` and hit the key, you'll see:
1656 For example, if you type ``np.load`` and hit the key, you'll see:
1657
1657
1658 ::
1658 ::
1659
1659
1660 In [21]: np.load<TAB HERE>
1660 In [21]: np.load<TAB HERE>
1661 np.load np.loads np.loadtxt
1661 np.load np.loads np.loadtxt
1662
1662
1663 so you can quickly find all the load-related functionality in numpy. Tab
1663 so you can quickly find all the load-related functionality in numpy. Tab
1664 completion works even for function arguments, for example consider this
1664 completion works even for function arguments, for example consider this
1665 function definition:
1665 function definition:
1666
1666
1667 ::
1667 ::
1668
1668
1669 In [20]: def f(x, frobinate=False):
1669 In [20]: def f(x, frobinate=False):
1670 ....: if frobinate:
1670 ....: if frobinate:
1671 ....: return x**2
1671 ....: return x**2
1672 ....:
1672 ....:
1673
1673
1674 If you now use the ``<tab>`` key after having typed 'fro' you'll get all
1674 If you now use the ``<tab>`` key after having typed 'fro' you'll get all
1675 valid Python completions, but those marked with ``=`` at the end are
1675 valid Python completions, but those marked with ``=`` at the end are
1676 known to be keywords of your function:
1676 known to be keywords of your function:
1677
1677
1678 ::
1678 ::
1679
1679
1680 In [21]: f(2, fro<TAB HERE>
1680 In [21]: f(2, fro<TAB HERE>
1681 frobinate= frombuffer fromfunction frompyfunc fromstring
1681 frobinate= frombuffer fromfunction frompyfunc fromstring
1682 from fromfile fromiter fromregex frozenset
1682 from fromfile fromiter fromregex frozenset
1683
1683
1684 at this point you can add the ``b`` letter and hit ``<tab>`` once more,
1684 at this point you can add the ``b`` letter and hit ``<tab>`` once more,
1685 and IPython will finish the line for you:
1685 and IPython will finish the line for you:
1686
1686
1687 ::
1687 ::
1688
1688
1689 In [21]: f(2, frobinate=
1689 In [21]: f(2, frobinate=
1690
1690
1691 As a beginner, simply get into the habit of using ``<tab>`` after most
1691 As a beginner, simply get into the habit of using ``<tab>`` after most
1692 objects; it should quickly become second nature as you will see how
1692 objects; it should quickly become second nature as you will see how
1693 helps keep a fluid workflow and discover useful information. Later on
1693 helps keep a fluid workflow and discover useful information. Later on
1694 you can also customize this behavior by writing your own completion
1694 you can also customize this behavior by writing your own completion
1695 code, if you so desire.
1695 code, if you so desire.
1696
1696
1697 **Matplotlib integration**
1697 **Matplotlib integration**
1698
1698
1699 One of the most useful features of IPython for scientists is its tight
1699 One of the most useful features of IPython for scientists is its tight
1700 integration with matplotlib: at the terminal IPython lets you open
1700 integration with matplotlib: at the terminal IPython lets you open
1701 matplotlib figures without blocking your typing (which is what happens
1701 matplotlib figures without blocking your typing (which is what happens
1702 if you try to do the same thing at the default Python shell), and in the
1702 if you try to do the same thing at the default Python shell), and in the
1703 Qt console and notebook you can even view your figures embedded in your
1703 Qt console and notebook you can even view your figures embedded in your
1704 workspace next to the code that created them.
1704 workspace next to the code that created them.
1705
1705
1706 The matplotlib support can be either activated when you start IPython by
1706 The matplotlib support can be either activated when you start IPython by
1707 passing the ``--pylab`` flag, or at any point later in your session by
1707 passing the ``--pylab`` flag, or at any point later in your session by
1708 using the ``%pylab`` command. If you start IPython with ``--pylab``,
1708 using the ``%pylab`` command. If you start IPython with ``--pylab``,
1709 you'll see something like this (note the extra message about pylab):
1709 you'll see something like this (note the extra message about pylab):
1710
1710
1711 ::
1711 ::
1712
1712
1713 $ ipython --pylab
1713 $ ipython --pylab
1714 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1714 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1715 Type "copyright", "credits" or "license" for more information.
1715 Type "copyright", "credits" or "license" for more information.
1716
1716
1717 IPython 0.13.dev -- An enhanced Interactive Python.
1717 IPython 0.13.dev -- An enhanced Interactive Python.
1718 ? -> Introduction and overview of IPython's features.
1718 ? -> Introduction and overview of IPython's features.
1719 %quickref -> Quick reference.
1719 %quickref -> Quick reference.
1720 help -> Python's own help system.
1720 help -> Python's own help system.
1721 object? -> Details about 'object', use 'object??' for extra details.
1721 object? -> Details about 'object', use 'object??' for extra details.
1722
1722
1723 Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
1723 Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
1724 For more information, type 'help(pylab)'.
1724 For more information, type 'help(pylab)'.
1725
1725
1726 In [1]:
1726 In [1]:
1727
1727
1728 Furthermore, IPython will import ``numpy`` with the ``np`` shorthand,
1728 Furthermore, IPython will import ``numpy`` with the ``np`` shorthand,
1729 ``matplotlib.pyplot`` as ``plt``, and it will also load all of the numpy
1729 ``matplotlib.pyplot`` as ``plt``, and it will also load all of the numpy
1730 and pyplot top-level names so that you can directly type something like:
1730 and pyplot top-level names so that you can directly type something like:
1731
1731
1732 ::
1732 ::
1733
1733
1734 In [1]: x = linspace(0, 2*pi, 200)
1734 In [1]: x = linspace(0, 2*pi, 200)
1735
1735
1736 In [2]: plot(x, sin(x))
1736 In [2]: plot(x, sin(x))
1737 Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1737 Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1738
1738
1739 instead of having to prefix each call with its full signature (as we
1739 instead of having to prefix each call with its full signature (as we
1740 have been doing in the examples thus far):
1740 have been doing in the examples thus far):
1741
1741
1742 ::
1742 ::
1743
1743
1744 In [3]: x = np.linspace(0, 2*np.pi, 200)
1744 In [3]: x = np.linspace(0, 2*np.pi, 200)
1745
1745
1746 In [4]: plt.plot(x, np.sin(x))
1746 In [4]: plt.plot(x, np.sin(x))
1747 Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1747 Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1748
1748
1749 This shorthand notation can be a huge time-saver when working
1749 This shorthand notation can be a huge time-saver when working
1750 interactively (it's a few characters but you are likely to type them
1750 interactively (it's a few characters but you are likely to type them
1751 hundreds of times in a session). But we should note that as you develop
1751 hundreds of times in a session). But we should note that as you develop
1752 persistent scripts and notebooks meant for reuse, it's best to get in
1752 persistent scripts and notebooks meant for reuse, it's best to get in
1753 the habit of using the longer notation (known as *fully qualified names*
1753 the habit of using the longer notation (known as *fully qualified names*
1754 as it's clearer where things come from and it makes for more robust,
1754 as it's clearer where things come from and it makes for more robust,
1755 readable and maintainable code in the long run).
1755 readable and maintainable code in the long run).
1756
1756
1757 **Access to the operating system and files**
1757 **Access to the operating system and files**
1758
1758
1759 In IPython, you can type ``ls`` to see your files or ``cd`` to change
1759 In IPython, you can type ``ls`` to see your files or ``cd`` to change
1760 directories, just like you would at a regular system prompt:
1760 directories, just like you would at a regular system prompt:
1761
1761
1762 ::
1762 ::
1763
1763
1764 In [2]: cd tests
1764 In [2]: cd tests
1765 /home/fperez/ipython/nbconvert/tests
1765 /home/fperez/ipython/nbconvert/tests
1766
1766
1767 In [3]: ls test.*
1767 In [3]: ls test.*
1768 test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1768 test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1769
1769
1770 Furthermore, if you use the ``!`` at the beginning of a line, any
1770 Furthermore, if you use the ``!`` at the beginning of a line, any
1771 commands you pass afterwards go directly to the operating system:
1771 commands you pass afterwards go directly to the operating system:
1772
1772
1773 ::
1773 ::
1774
1774
1775 In [4]: !echo "Hello IPython"
1775 In [4]: !echo "Hello IPython"
1776 Hello IPython
1776 Hello IPython
1777
1777
1778 IPython offers a useful twist in this feature: it will substitute in the
1778 IPython offers a useful twist in this feature: it will substitute in the
1779 command the value of any *Python* variable you may have if you prepend
1779 command the value of any *Python* variable you may have if you prepend
1780 it with a ``$`` sign:
1780 it with a ``$`` sign:
1781
1781
1782 ::
1782 ::
1783
1783
1784 In [5]: message = 'IPython interpolates from Python to the shell'
1784 In [5]: message = 'IPython interpolates from Python to the shell'
1785
1785
1786 In [6]: !echo $message
1786 In [6]: !echo $message
1787 IPython interpolates from Python to the shell
1787 IPython interpolates from Python to the shell
1788
1788
1789 This feature can be extremely useful, as it lets you combine the power
1789 This feature can be extremely useful, as it lets you combine the power
1790 and clarity of Python for complex logic with the immediacy and
1790 and clarity of Python for complex logic with the immediacy and
1791 familiarity of many shell commands. Additionally, if you start the line
1791 familiarity of many shell commands. Additionally, if you start the line
1792 with *two* ``$$`` signs, the output of the command will be automatically
1792 with *two* ``$$`` signs, the output of the command will be automatically
1793 captured as a list of lines, e.g.:
1793 captured as a list of lines, e.g.:
1794
1794
1795 ::
1795 ::
1796
1796
1797 In [10]: !!ls test.*
1797 In [10]: !!ls test.*
1798 Out[10]:
1798 Out[10]:
1799 ['test.aux',
1799 ['test.aux',
1800 'test.html',
1800 'test.html',
1801 'test.ipynb',
1801 'test.ipynb',
1802 'test.log',
1802 'test.log',
1803 'test.out',
1803 'test.out',
1804 'test.pdf',
1804 'test.pdf',
1805 'test.rst',
1805 'test.rst',
1806 'test.tex']
1806 'test.tex']
1807
1807
1808 As explained above, you can now use this as the variable ``_10``. If you
1808 As explained above, you can now use this as the variable ``_10``. If you
1809 directly want to capture the output of a system command to a Python
1809 directly want to capture the output of a system command to a Python
1810 variable, you can use the syntax ``=!``:
1810 variable, you can use the syntax ``=!``:
1811
1811
1812 ::
1812 ::
1813
1813
1814 In [11]: testfiles =! ls test.*
1814 In [11]: testfiles =! ls test.*
1815
1815
1816 In [12]: print testfiles
1816 In [12]: print testfiles
1817 ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
1817 ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
1818
1818
1819 Finally, the special ``%alias`` command lets you define names that are
1819 Finally, the special ``%alias`` command lets you define names that are
1820 shorthands for system commands, so that you can type them without having
1820 shorthands for system commands, so that you can type them without having
1821 to prefix them via ``!`` explicitly (for example, ``ls`` is an alias
1821 to prefix them via ``!`` explicitly (for example, ``ls`` is an alias
1822 that has been predefined for you at startup).
1822 that has been predefined for you at startup).
1823
1823
1824 **Magic commands**
1824 **Magic commands**
1825
1825
1826 IPython has a system for special commands, called 'magics', that let you
1826 IPython has a system for special commands, called 'magics', that let you
1827 control IPython itself and perform many common tasks with a more
1827 control IPython itself and perform many common tasks with a more
1828 shell-like syntax: it uses spaces for delimiting arguments, flags can be
1828 shell-like syntax: it uses spaces for delimiting arguments, flags can be
1829 set with dashes and all arguments are treated as strings, so no
1829 set with dashes and all arguments are treated as strings, so no
1830 additional quoting is required. This kind of syntax is invalid in the
1830 additional quoting is required. This kind of syntax is invalid in the
1831 Python language but very convenient for interactive typing (less
1831 Python language but very convenient for interactive typing (less
1832 parentheses, commans and quoting everywhere); IPython distinguishes the
1832 parentheses, commans and quoting everywhere); IPython distinguishes the
1833 two by detecting lines that start with the ``%`` character.
1833 two by detecting lines that start with the ``%`` character.
1834
1834
1835 You can learn more about the magic system by simply typing ``%magic`` at
1835 You can learn more about the magic system by simply typing ``%magic`` at
1836 the prompt, which will give you a short description plus the
1836 the prompt, which will give you a short description plus the
1837 documentation on *all* available magics. If you want to see only a
1837 documentation on *all* available magics. If you want to see only a
1838 listing of existing magics, you can use ``%lsmagic``:
1838 listing of existing magics, you can use ``%lsmagic``:
1839
1839
1840 ::
1840 ::
1841
1841
1842 In [4]: lsmagic
1842 In [4]: lsmagic
1843 Available magic functions:
1843 Available magic functions:
1844 %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
1844 %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
1845 %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
1845 %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
1846 %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
1846 %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
1847 %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
1847 %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
1848 %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
1848 %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
1849 %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
1849 %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
1850 %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
1850 %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
1851 %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
1851 %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
1852
1852
1853 Automagic is ON, % prefix NOT needed for magic functions.
1853 Automagic is ON, % prefix NOT needed for magic functions.
1854
1854
1855 Note how the example above omitted the eplicit ``%`` marker and simply
1855 Note how the example above omitted the eplicit ``%`` marker and simply
1856 uses ``lsmagic``. As long as the 'automagic' feature is on (which it is
1856 uses ``lsmagic``. As long as the 'automagic' feature is on (which it is
1857 by default), you can omit the ``%`` marker as long as there is no
1857 by default), you can omit the ``%`` marker as long as there is no
1858 ambiguity with a Python variable of the same name.
1858 ambiguity with a Python variable of the same name.
1859
1859
1860 **Running your code**
1860 **Running your code**
1861
1861
1862 While it's easy to type a few lines of code in IPython, for any
1862 While it's easy to type a few lines of code in IPython, for any
1863 long-lived work you should keep your codes in Python scripts (or in
1863 long-lived work you should keep your codes in Python scripts (or in
1864 IPython notebooks, see below). Consider that you have a script, in this
1864 IPython notebooks, see below). Consider that you have a script, in this
1865 case trivially simple for the sake of brevity, named ``simple.py``:
1865 case trivially simple for the sake of brevity, named ``simple.py``:
1866
1866
1867 ::
1867 ::
1868
1868
1869 In [12]: !cat simple.py
1869 In [12]: !cat simple.py
1870 import numpy as np
1870 import numpy as np
1871
1871
1872 x = np.random.normal(size=100)
1872 x = np.random.normal(size=100)
1873
1873
1874 print 'First elment of x:', x[0]
1874 print 'First elment of x:', x[0]
1875
1875
1876 The typical workflow with IPython is to use the ``%run`` magic to
1876 The typical workflow with IPython is to use the ``%run`` magic to
1877 execute your script (you can omit the .py extension if you want). When
1877 execute your script (you can omit the .py extension if you want). When
1878 you run it, the script will execute just as if it had been run at the
1878 you run it, the script will execute just as if it had been run at the
1879 system prompt with ``python simple.py`` (though since modules don't get
1879 system prompt with ``python simple.py`` (though since modules don't get
1880 re-executed on new imports by Python, all system initialization is
1880 re-executed on new imports by Python, all system initialization is
1881 essentially free, which can have a significant run time impact in some
1881 essentially free, which can have a significant run time impact in some
1882 cases):
1882 cases):
1883
1883
1884 ::
1884 ::
1885
1885
1886 In [13]: run simple
1886 In [13]: run simple
1887 First elment of x: -1.55872256289
1887 First elment of x: -1.55872256289
1888
1888
1889 Once it completes, all variables defined in it become available for you
1889 Once it completes, all variables defined in it become available for you
1890 to use interactively:
1890 to use interactively:
1891
1891
1892 ::
1892 ::
1893
1893
1894 In [14]: x.shape
1894 In [14]: x.shape
1895 Out[14]: (100,)
1895 Out[14]: (100,)
1896
1896
1897 This allows you to plot data, try out ideas, etc, in a
1897 This allows you to plot data, try out ideas, etc, in a
1898 ``%run``/interact/edit cycle that can be very productive. As you start
1898 ``%run``/interact/edit cycle that can be very productive. As you start
1899 understanding your problem better you can refine your script further,
1899 understanding your problem better you can refine your script further,
1900 incrementally improving it based on the work you do at the IPython
1900 incrementally improving it based on the work you do at the IPython
1901 prompt. At any point you can use the ``%hist`` magic to print out your
1901 prompt. At any point you can use the ``%hist`` magic to print out your
1902 history without prompts, so that you can copy useful fragments back into
1902 history without prompts, so that you can copy useful fragments back into
1903 the script.
1903 the script.
1904
1904
1905 By default, ``%run`` executes scripts in a completely empty namespace,
1905 By default, ``%run`` executes scripts in a completely empty namespace,
1906 to better mimic how they would execute at the system prompt with plain
1906 to better mimic how they would execute at the system prompt with plain
1907 Python. But if you use the ``-i`` flag, the script will also see your
1907 Python. But if you use the ``-i`` flag, the script will also see your
1908 interactively defined variables. This lets you edit in a script larger
1908 interactively defined variables. This lets you edit in a script larger
1909 amounts of code that still behave as if you had typed them at the
1909 amounts of code that still behave as if you had typed them at the
1910 IPython prompt.
1910 IPython prompt.
1911
1911
1912 You can also get a summary of the time taken by your script with the
1912 You can also get a summary of the time taken by your script with the
1913 ``-t`` flag; consider a different script ``randsvd.py`` that takes a bit
1913 ``-t`` flag; consider a different script ``randsvd.py`` that takes a bit
1914 longer to run:
1914 longer to run:
1915
1915
1916 ::
1916 ::
1917
1917
1918 In [21]: run -t randsvd.py
1918 In [21]: run -t randsvd.py
1919
1919
1920 IPython CPU timings (estimated):
1920 IPython CPU timings (estimated):
1921 User : 0.38 s.
1921 User : 0.38 s.
1922 System : 0.04 s.
1922 System : 0.04 s.
1923 Wall time: 0.34 s.
1923 Wall time: 0.34 s.
1924
1924
1925 ``User`` is the time spent by the computer executing your code, while
1925 ``User`` is the time spent by the computer executing your code, while
1926 ``System`` is the time the operating system had to work on your behalf,
1926 ``System`` is the time the operating system had to work on your behalf,
1927 doing things like memory allocation that are needed by your code but
1927 doing things like memory allocation that are needed by your code but
1928 that you didn't explicitly program and that happen inside the kernel.
1928 that you didn't explicitly program and that happen inside the kernel.
1929 The ``Wall time`` is the time on a 'clock on the wall' between the start
1929 The ``Wall time`` is the time on a 'clock on the wall' between the start
1930 and end of your program.
1930 and end of your program.
1931
1931
1932 If ``Wall > User+System``, your code is most likely waiting idle for
1932 If ``Wall > User+System``, your code is most likely waiting idle for
1933 certain periods. That could be waiting for data to arrive from a remote
1933 certain periods. That could be waiting for data to arrive from a remote
1934 source or perhaps because the operating system has to swap large amounts
1934 source or perhaps because the operating system has to swap large amounts
1935 of virtual memory. If you know that your code doesn't explicitly wait
1935 of virtual memory. If you know that your code doesn't explicitly wait
1936 for remote data to arrive, you should investigate further to identify
1936 for remote data to arrive, you should investigate further to identify
1937 possible ways of improving the performance profile.
1937 possible ways of improving the performance profile.
1938
1938
1939 If you only want to time how long a single statement takes, you don't
1939 If you only want to time how long a single statement takes, you don't
1940 need to put it into a script as you can use the ``%timeit`` magic, which
1940 need to put it into a script as you can use the ``%timeit`` magic, which
1941 uses Python's ``timeit`` module to very carefully measure timig data;
1941 uses Python's ``timeit`` module to very carefully measure timig data;
1942 ``timeit`` can measure even short statements that execute extremely
1942 ``timeit`` can measure even short statements that execute extremely
1943 fast:
1943 fast:
1944
1944
1945 ::
1945 ::
1946
1946
1947 In [27]: %timeit a=1
1947 In [27]: %timeit a=1
1948 10000000 loops, best of 3: 23 ns per loop
1948 10000000 loops, best of 3: 23 ns per loop
1949
1949
1950 and for code that runs longer, it automatically adjusts so the overall
1950 and for code that runs longer, it automatically adjusts so the overall
1951 measurement doesn't take too long:
1951 measurement doesn't take too long:
1952
1952
1953 ::
1953 ::
1954
1954
1955 In [28]: %timeit np.linalg.svd(x)
1955 In [28]: %timeit np.linalg.svd(x)
1956 1 loops, best of 3: 310 ms per loop
1956 1 loops, best of 3: 310 ms per loop
1957
1957
1958 The ``%run`` magic still has more options for debugging and profiling
1958 The ``%run`` magic still has more options for debugging and profiling
1959 data; you should read its documentation for many useful details (as
1959 data; you should read its documentation for many useful details (as
1960 always, just type ``%run?``).
1960 always, just type ``%run?``).
1961
1961
1962 The graphical Qt console
1962 The graphical Qt console
1963 ------------------------
1963 ------------------------
1964
1964
1965 If you type at the system prompt (see the IPython website for
1965 If you type at the system prompt (see the IPython website for
1966 installation details, as this requires some additional libraries):
1966 installation details, as this requires some additional libraries):
1967
1967
1968 ::
1968 ::
1969
1969
1970 $ ipython qtconsole
1970 $ ipython qtconsole
1971
1971
1972 instead of opening in a terminal as before, IPython will start a
1972 instead of opening in a terminal as before, IPython will start a
1973 graphical console that at first sight appears just like a terminal, but
1973 graphical console that at first sight appears just like a terminal, but
1974 which is in fact much more capable than a text-only terminal. This is a
1974 which is in fact much more capable than a text-only terminal. This is a
1975 specialized terminal designed for interactive scientific work, and it
1975 specialized terminal designed for interactive scientific work, and it
1976 supports full multi-line editing with color highlighting and graphical
1976 supports full multi-line editing with color highlighting and graphical
1977 calltips for functions, it can keep multiple IPython sessions open
1977 calltips for functions, it can keep multiple IPython sessions open
1978 simultaneously in tabs, and when scripts run it can display the figures
1978 simultaneously in tabs, and when scripts run it can display the figures
1979 inline directly in the work area.
1979 inline directly in the work area.
1980
1980
1981 .. raw:: html
1981 .. raw:: html
1982
1982
1983 <center>
1983 <center>
1984
1984
1985 .. raw:: html
1985 .. raw:: html
1986
1986
1987 </center>
1987 </center>
1988
1988
1989
1989
1990 % This cell is for the pdflatex output only
1990 % This cell is for the pdflatex output only
1991 \begin{figure}[htbp]
1991 \begin{figure}[htbp]
1992 \centering
1992 \centering
1993 \includegraphics[width=3in]{ipython_qtconsole2.png}
1993 \includegraphics[width=3in]{ipython_qtconsole2.png}
1994 \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
1994 \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
1995 \end{figure}
1995 \end{figure}
1996 The Qt console accepts the same ``--pylab`` startup flags as the
1996 The Qt console accepts the same ``--pylab`` startup flags as the
1997 terminal, but you can additionally supply the value ``--pylab inline``,
1997 terminal, but you can additionally supply the value ``--pylab inline``,
1998 which enables the support for inline graphics shown in the figure. This
1998 which enables the support for inline graphics shown in the figure. This
1999 is ideal for keeping all the code and figures in the same session, given
1999 is ideal for keeping all the code and figures in the same session, given
2000 that the console can save the output of your entire session to HTML or
2000 that the console can save the output of your entire session to HTML or
2001 PDF.
2001 PDF.
2002
2002
2003 Since the Qt console makes it far more convenient than the terminal to
2003 Since the Qt console makes it far more convenient than the terminal to
2004 edit blocks of code with multiple lines, in this environment it's worth
2004 edit blocks of code with multiple lines, in this environment it's worth
2005 knowing about the ``%loadpy`` magic function. ``%loadpy`` takes a path
2005 knowing about the ``%loadpy`` magic function. ``%loadpy`` takes a path
2006 to a local file or remote URL, fetches its contents, and puts it in the
2006 to a local file or remote URL, fetches its contents, and puts it in the
2007 work area for you to further edit and execute. It can be an extremely
2007 work area for you to further edit and execute. It can be an extremely
2008 fast and convenient way of loading code from local disk or remote
2008 fast and convenient way of loading code from local disk or remote
2009 examples from sites such as the `Matplotlib
2009 examples from sites such as the `Matplotlib
2010 gallery <http://matplotlib.sourceforge.net/gallery.html>`_.
2010 gallery <http://matplotlib.sourceforge.net/gallery.html>`_.
2011
2011
2012 Other than its enhanced capabilities for code and graphics, all of the
2012 Other than its enhanced capabilities for code and graphics, all of the
2013 features of IPython we've explained before remain functional in this
2013 features of IPython we've explained before remain functional in this
2014 graphical console.
2014 graphical console.
2015
2015
2016 The IPython Notebook
2016 The IPython Notebook
2017 --------------------
2017 --------------------
2018
2018
2019 The third way to interact with IPython, in addition to the terminal and
2019 The third way to interact with IPython, in addition to the terminal and
2020 graphical Qt console, is a powerful web interface called the "IPython
2020 graphical Qt console, is a powerful web interface called the "IPython
2021 Notebook". If you run at the system console (you can omit the ``pylab``
2021 Notebook". If you run at the system console (you can omit the ``pylab``
2022 flags if you don't need plotting support):
2022 flags if you don't need plotting support):
2023
2023
2024 ::
2024 ::
2025
2025
2026 $ ipython notebook --pylab inline
2026 $ ipython notebook --pylab inline
2027
2027
2028 IPython will start a process that runs a web server in your local
2028 IPython will start a process that runs a web server in your local
2029 machine and to which a web browser can connect. The Notebook is a
2029 machine and to which a web browser can connect. The Notebook is a
2030 workspace that lets you execute code in blocks called 'cells' and
2030 workspace that lets you execute code in blocks called 'cells' and
2031 displays any results and figures, but which can also contain arbitrary
2031 displays any results and figures, but which can also contain arbitrary
2032 text (including LaTeX-formatted mathematical expressions) and any rich
2032 text (including LaTeX-formatted mathematical expressions) and any rich
2033 media that a modern web browser is capable of displaying.
2033 media that a modern web browser is capable of displaying.
2034
2034
2035 .. raw:: html
2035 .. raw:: html
2036
2036
2037 <center>
2037 <center>
2038
2038
2039 .. raw:: html
2039 .. raw:: html
2040
2040
2041 </center>
2041 </center>
2042
2042
2043
2043
2044 % This cell is for the pdflatex output only
2044 % This cell is for the pdflatex output only
2045 \begin{figure}[htbp]
2045 \begin{figure}[htbp]
2046 \centering
2046 \centering
2047 \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
2047 \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
2048 \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
2048 \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
2049 \end{figure}
2049 \end{figure}
2050 In fact, this document was written as a Notebook, and only exported to
2050 In fact, this document was written as a Notebook, and only exported to
2051 LaTeX for printing. Inside of each cell, all the features of IPython
2051 LaTeX for printing. Inside of each cell, all the features of IPython
2052 that we have discussed before remain functional, since ultimately this
2052 that we have discussed before remain functional, since ultimately this
2053 web client is communicating with the same IPython code that runs in the
2053 web client is communicating with the same IPython code that runs in the
2054 terminal. But this interface is a much more rich and powerful
2054 terminal. But this interface is a much more rich and powerful
2055 environment for maintaining long-term "live and executable" scientific
2055 environment for maintaining long-term "live and executable" scientific
2056 documents.
2056 documents.
2057
2057
2058 Notebook environments have existed in commercial systems like
2058 Notebook environments have existed in commercial systems like
2059 Mathematica(TM) and Maple(TM) for a long time; in the open source world
2059 Mathematica(TM) and Maple(TM) for a long time; in the open source world
2060 the `Sage <http://sagemath.org>`_ project blazed this particular trail
2060 the `Sage <http://sagemath.org>`_ project blazed this particular trail
2061 starting in 2006, and now we bring all the features that have made
2061 starting in 2006, and now we bring all the features that have made
2062 IPython such a widely used tool to a Notebook model.
2062 IPython such a widely used tool to a Notebook model.
2063
2063
2064 Since the Notebook runs as a web application, it is possible to
2064 Since the Notebook runs as a web application, it is possible to
2065 configure it for remote access, letting you run your computations on a
2065 configure it for remote access, letting you run your computations on a
2066 persistent server close to your data, which you can then access remotely
2066 persistent server close to your data, which you can then access remotely
2067 from any browser-equipped computer. We encourage you to read the
2067 from any browser-equipped computer. We encourage you to read the
2068 extensive documentation provided by the IPython project for details on
2068 extensive documentation provided by the IPython project for details on
2069 how to do this and many more features of the notebook.
2069 how to do this and many more features of the notebook.
2070
2070
2071 Finally, as we said earlier, IPython also has a high-level and easy to
2071 Finally, as we said earlier, IPython also has a high-level and easy to
2072 use set of libraries for parallel computing, that let you control
2072 use set of libraries for parallel computing, that let you control
2073 (interactively if desired) not just one IPython but an entire cluster of
2073 (interactively if desired) not just one IPython but an entire cluster of
2074 'IPython engines'. Unfortunately a detailed discussion of these tools is
2074 'IPython engines'. Unfortunately a detailed discussion of these tools is
2075 beyond the scope of this text, but should you need to parallelize your
2075 beyond the scope of this text, but should you need to parallelize your
2076 analysis codes, a quick read of the tutorials and examples provided at
2076 analysis codes, a quick read of the tutorials and examples provided at
2077 the IPython site may prove fruitful.
2077 the IPython site may prove fruitful.
@@ -1,2255 +1,2255 b''
1 %% This file was auto-generated by IPython, do NOT edit
1 %% This file was auto-generated by IPython, do NOT edit
2 %% Conversion from the original notebook file:
2 %% Conversion from the original notebook file:
3 %% tests/ipynbref/IntroNumPy.orig.ipynb
3 %% tests/ipynbref/IntroNumPy.orig.ipynb
4 %%
4 %%
5 \documentclass[11pt,english]{article}
5 \documentclass[11pt,english]{article}
6
6
7 %% This is the automatic preamble used by IPython. Note that it does *not*
7 %% This is the automatic preamble used by IPython. Note that it does *not*
8 %% include a documentclass declaration, that is added at runtime to the overall
8 %% include a documentclass declaration, that is added at runtime to the overall
9 %% document.
9 %% document.
10
10
11 \usepackage{amsmath}
11 \usepackage{amsmath}
12 \usepackage{amssymb}
12 \usepackage{amssymb}
13 \usepackage{graphicx}
13 \usepackage{graphicx}
14 \usepackage{ucs}
14 \usepackage{ucs}
15 \usepackage[utf8x]{inputenc}
15 \usepackage[utf8x]{inputenc}
16
16
17 % needed for markdown enumerations to work
17 % needed for markdown enumerations to work
18 \usepackage{enumerate}
18 \usepackage{enumerate}
19
19
20 % Slightly bigger margins than the latex defaults
20 % Slightly bigger margins than the latex defaults
21 \usepackage{geometry}
21 \usepackage{geometry}
22 \geometry{verbose,tmargin=3cm,bmargin=3cm,lmargin=2.5cm,rmargin=2.5cm}
22 \geometry{verbose,tmargin=3cm,bmargin=3cm,lmargin=2.5cm,rmargin=2.5cm}
23
23
24 % Define a few colors for use in code, links and cell shading
24 % Define a few colors for use in code, links and cell shading
25 \usepackage{color}
25 \usepackage{color}
26 \definecolor{orange}{cmyk}{0,0.4,0.8,0.2}
26 \definecolor{orange}{cmyk}{0,0.4,0.8,0.2}
27 \definecolor{darkorange}{rgb}{.71,0.21,0.01}
27 \definecolor{darkorange}{rgb}{.71,0.21,0.01}
28 \definecolor{darkgreen}{rgb}{.12,.54,.11}
28 \definecolor{darkgreen}{rgb}{.12,.54,.11}
29 \definecolor{myteal}{rgb}{.26, .44, .56}
29 \definecolor{myteal}{rgb}{.26, .44, .56}
30 \definecolor{gray}{gray}{0.45}
30 \definecolor{gray}{gray}{0.45}
31 \definecolor{lightgray}{gray}{.95}
31 \definecolor{lightgray}{gray}{.95}
32 \definecolor{mediumgray}{gray}{.8}
32 \definecolor{mediumgray}{gray}{.8}
33 \definecolor{inputbackground}{rgb}{.95, .95, .85}
33 \definecolor{inputbackground}{rgb}{.95, .95, .85}
34 \definecolor{outputbackground}{rgb}{.95, .95, .95}
34 \definecolor{outputbackground}{rgb}{.95, .95, .95}
35 \definecolor{traceback}{rgb}{1, .95, .95}
35 \definecolor{traceback}{rgb}{1, .95, .95}
36
36
37 % Framed environments for code cells (inputs, outputs, errors, ...). The
37 % Framed environments for code cells (inputs, outputs, errors, ...). The
38 % various uses of \unskip (or not) at the end were fine-tuned by hand, so don't
38 % various uses of \unskip (or not) at the end were fine-tuned by hand, so don't
39 % randomly change them unless you're sure of the effect it will have.
39 % randomly change them unless you're sure of the effect it will have.
40 \usepackage{framed}
40 \usepackage{framed}
41
41
42 % remove extraneous vertical space in boxes
42 % remove extraneous vertical space in boxes
43 \setlength\fboxsep{0pt}
43 \setlength\fboxsep{0pt}
44
44
45 % codecell is the whole input+output set of blocks that a Code cell can
45 % codecell is the whole input+output set of blocks that a Code cell can
46 % generate.
46 % generate.
47
47
48 % TODO: unfortunately, it seems that using a framed codecell environment breaks
48 % TODO: unfortunately, it seems that using a framed codecell environment breaks
49 % the ability of the frames inside of it to be broken across pages. This
49 % the ability of the frames inside of it to be broken across pages. This
50 % causes at least the problem of having lots of empty space at the bottom of
50 % causes at least the problem of having lots of empty space at the bottom of
51 % pages as new frames are moved to the next page, and if a single frame is too
51 % pages as new frames are moved to the next page, and if a single frame is too
52 % long to fit on a page, will completely stop latex from compiling the
52 % long to fit on a page, will completely stop latex from compiling the
53 % document. So unless we figure out a solution to this, we'll have to instead
53 % document. So unless we figure out a solution to this, we'll have to instead
54 % leave the codecell env. as empty. I'm keeping the original codecell
54 % leave the codecell env. as empty. I'm keeping the original codecell
55 % definition here (a thin vertical bar) for reference, in case we find a
55 % definition here (a thin vertical bar) for reference, in case we find a
56 % solution to the page break issue.
56 % solution to the page break issue.
57
57
58 %% \newenvironment{codecell}{%
58 %% \newenvironment{codecell}{%
59 %% \def\FrameCommand{\color{mediumgray} \vrule width 1pt \hspace{5pt}}%
59 %% \def\FrameCommand{\color{mediumgray} \vrule width 1pt \hspace{5pt}}%
60 %% \MakeFramed{\vspace{-0.5em}}}
60 %% \MakeFramed{\vspace{-0.5em}}}
61 %% {\unskip\endMakeFramed}
61 %% {\unskip\endMakeFramed}
62
62
63 % For now, make this a no-op...
63 % For now, make this a no-op...
64 \newenvironment{codecell}{}
64 \newenvironment{codecell}{}
65
65
66 \newenvironment{codeinput}{%
66 \newenvironment{codeinput}{%
67 \def\FrameCommand{\colorbox{inputbackground}}%
67 \def\FrameCommand{\colorbox{inputbackground}}%
68 \MakeFramed{\advance\hsize-\width \FrameRestore}}
68 \MakeFramed{\advance\hsize-\width \FrameRestore}}
69 {\unskip\endMakeFramed}
69 {\unskip\endMakeFramed}
70
70
71 \newenvironment{codeoutput}{%
71 \newenvironment{codeoutput}{%
72 \def\FrameCommand{\colorbox{outputbackground}}%
72 \def\FrameCommand{\colorbox{outputbackground}}%
73 \vspace{-1.4em}
73 \vspace{-1.4em}
74 \MakeFramed{\advance\hsize-\width \FrameRestore}}
74 \MakeFramed{\advance\hsize-\width \FrameRestore}}
75 {\unskip\medskip\endMakeFramed}
75 {\unskip\medskip\endMakeFramed}
76
76
77 \newenvironment{traceback}{%
77 \newenvironment{traceback}{%
78 \def\FrameCommand{\colorbox{traceback}}%
78 \def\FrameCommand{\colorbox{traceback}}%
79 \MakeFramed{\advance\hsize-\width \FrameRestore}}
79 \MakeFramed{\advance\hsize-\width \FrameRestore}}
80 {\endMakeFramed}
80 {\endMakeFramed}
81
81
82 % Use and configure listings package for nicely formatted code
82 % Use and configure listings package for nicely formatted code
83 \usepackage{listingsutf8}
83 \usepackage{listingsutf8}
84 \lstset{
84 \lstset{
85 language=python,
85 language=python,
86 inputencoding=utf8x,
86 inputencoding=utf8x,
87 extendedchars=\true,
87 extendedchars=\true,
88 aboveskip=\smallskipamount,
88 aboveskip=\smallskipamount,
89 belowskip=\smallskipamount,
89 belowskip=\smallskipamount,
90 xleftmargin=2mm,
90 xleftmargin=2mm,
91 breaklines=true,
91 breaklines=true,
92 basicstyle=\small \ttfamily,
92 basicstyle=\small \ttfamily,
93 showstringspaces=false,
93 showstringspaces=false,
94 keywordstyle=\color{blue}\bfseries,
94 keywordstyle=\color{blue}\bfseries,
95 commentstyle=\color{myteal},
95 commentstyle=\color{myteal},
96 stringstyle=\color{darkgreen},
96 stringstyle=\color{darkgreen},
97 identifierstyle=\color{darkorange},
97 identifierstyle=\color{darkorange},
98 columns=fullflexible, % tighter character kerning, like verb
98 columns=fullflexible, % tighter character kerning, like verb
99 }
99 }
100
100
101 % The hyperref package gives us a pdf with properly built
101 % The hyperref package gives us a pdf with properly built
102 % internal navigation ('pdf bookmarks' for the table of contents,
102 % internal navigation ('pdf bookmarks' for the table of contents,
103 % internal cross-reference links, web links for URLs, etc.)
103 % internal cross-reference links, web links for URLs, etc.)
104 \usepackage{hyperref}
104 \usepackage{hyperref}
105 \hypersetup{
105 \hypersetup{
106 breaklinks=true, % so long urls are correctly broken across lines
106 breaklinks=true, % so long urls are correctly broken across lines
107 colorlinks=true,
107 colorlinks=true,
108 urlcolor=blue,
108 urlcolor=blue,
109 linkcolor=darkorange,
109 linkcolor=darkorange,
110 citecolor=darkgreen,
110 citecolor=darkgreen,
111 }
111 }
112
112
113 % hardcode size of all verbatim environments to be a bit smaller
113 % hardcode size of all verbatim environments to be a bit smaller
114 \makeatletter
114 \makeatletter
115 \g@addto@macro\@verbatim\small\topsep=0.5em\partopsep=0pt
115 \g@addto@macro\@verbatim\small\topsep=0.5em\partopsep=0pt
116 \makeatother
116 \makeatother
117
117
118 % Prevent overflowing lines due to urls and other hard-to-break entities.
118 % Prevent overflowing lines due to urls and other hard-to-break entities.
119 \sloppy
119 \sloppy
120
120
121 \begin{document}
121 \begin{document}
122
122
123 \section{An Introduction to the Scientific Python Ecosystem}
123 \section{An Introduction to the Scientific Python Ecosystem}
124 While the Python language is an excellent tool for general-purpose
124 While the Python language is an excellent tool for general-purpose
125 programming, with a highly readable syntax, rich and powerful data types
125 programming, with a highly readable syntax, rich and powerful data types
126 (strings, lists, sets, dictionaries, arbitrary length integers, etc) and
126 (strings, lists, sets, dictionaries, arbitrary length integers, etc) and
127 a very comprehensive standard library, it was not designed specifically
127 a very comprehensive standard library, it was not designed specifically
128 for mathematical and scientific computing. Neither the language nor its
128 for mathematical and scientific computing. Neither the language nor its
129 standard library have facilities for the efficient representation of
129 standard library have facilities for the efficient representation of
130 multidimensional datasets, tools for linear algebra and general matrix
130 multidimensional datasets, tools for linear algebra and general matrix
131 manipulations (an essential building block of virtually all technical
131 manipulations (an essential building block of virtually all technical
132 computing), nor any data visualization facilities.
132 computing), nor any data visualization facilities.
133
133
134 In particular, Python lists are very flexible containers that can be
134 In particular, Python lists are very flexible containers that can be
135 nested arbitrarily deep and which can hold any Python object in them,
135 nested arbitrarily deep and which can hold any Python object in them,
136 but they are poorly suited to represent efficiently common mathematical
136 but they are poorly suited to represent efficiently common mathematical
137 constructs like vectors and matrices. In contrast, much of our modern
137 constructs like vectors and matrices. In contrast, much of our modern
138 heritage of scientific computing has been built on top of libraries
138 heritage of scientific computing has been built on top of libraries
139 written in the Fortran language, which has native support for vectors
139 written in the Fortran language, which has native support for vectors
140 and matrices as well as a library of mathematical functions that can
140 and matrices as well as a library of mathematical functions that can
141 efficiently operate on entire arrays at once.
141 efficiently operate on entire arrays at once.
142
142
143 \subsection{Scientific Python: a collaboration of projects built by scientists}
143 \subsection{Scientific Python: a collaboration of projects built by scientists}
144 The scientific community has developed a set of related Python libraries
144 The scientific community has developed a set of related Python libraries
145 that provide powerful array facilities, linear algebra, numerical
145 that provide powerful array facilities, linear algebra, numerical
146 algorithms, data visualization and more. In this appendix, we will
146 algorithms, data visualization and more. In this appendix, we will
147 briefly outline the tools most frequently used for this purpose, that
147 briefly outline the tools most frequently used for this purpose, that
148 make ``Scientific Python'' something far more powerful than the Python
148 make ``Scientific Python'' something far more powerful than the Python
149 language alone.
149 language alone.
150
150
151 For reasons of space, we can only describe in some detail the central
151 For reasons of space, we can only describe in some detail the central
152 Numpy library, but below we provide links to the websites of each
152 Numpy library, but below we provide links to the websites of each
153 project where you can read their documentation in more detail.
153 project where you can read their documentation in more detail.
154
154
155 First, let's look at an overview of the basic tools that most scientists
155 First, let's look at an overview of the basic tools that most scientists
156 use in daily research with Python. The core of this ecosystem is
156 use in daily research with Python. The core of this ecosystem is
157 composed of:
157 composed of:
158
158
159 \begin{itemize}
159 \begin{itemize}
160 \item
160 \item
161 Numpy: the basic library that most others depend on, it provides a
161 Numpy: the basic library that most others depend on, it provides a
162 powerful array type that can represent multidmensional datasets of
162 powerful array type that can represent multidmensional datasets of
163 many different kinds and that supports arithmetic operations. Numpy
163 many different kinds and that supports arithmetic operations. Numpy
164 also provides a library of common mathematical functions, basic linear
164 also provides a library of common mathematical functions, basic linear
165 algebra, random number generation and Fast Fourier Transforms. Numpy
165 algebra, random number generation and Fast Fourier Transforms. Numpy
166 can be found at \href{http://numpy.scipy.org}{numpy.scipy.org}
166 can be found at \href{http://numpy.scipy.org}{numpy.scipy.org}
167 \item
167 \item
168 Scipy: a large collection of numerical algorithms that operate on
168 Scipy: a large collection of numerical algorithms that operate on
169 numpy arrays and provide facilities for many common tasks in
169 numpy arrays and provide facilities for many common tasks in
170 scientific computing, including dense and sparse linear algebra
170 scientific computing, including dense and sparse linear algebra
171 support, optimization, special functions, statistics, n-dimensional
171 support, optimization, special functions, statistics, n-dimensional
172 image processing, signal processing and more. Scipy can be found at
172 image processing, signal processing and more. Scipy can be found at
173 \href{http://scipy.org}{scipy.org}.
173 \href{http://scipy.org}{scipy.org}.
174 \item
174 \item
175 Matplotlib: a data visualization library with a strong focus on
175 Matplotlib: a data visualization library with a strong focus on
176 producing high-quality output, it supports a variety of common
176 producing high-quality output, it supports a variety of common
177 scientific plot types in two and three dimensions, with precise
177 scientific plot types in two and three dimensions, with precise
178 control over the final output and format for publication-quality
178 control over the final output and format for publication-quality
179 results. Matplotlib can also be controlled interactively allowing
179 results. Matplotlib can also be controlled interactively allowing
180 graphical manipulation of your data (zooming, panning, etc) and can be
180 graphical manipulation of your data (zooming, panning, etc) and can be
181 used with most modern user interface toolkits. It can be found at
181 used with most modern user interface toolkits. It can be found at
182 \href{http://matplotlib.sf.net}{matplotlib.sf.net}.
182 \href{http://matplotlib.sf.net}{matplotlib.sf.net}.
183 \item
183 \item
184 IPython: while not strictly scientific in nature, IPython is the
184 IPython: while not strictly scientific in nature, IPython is the
185 interactive environment in which many scientists spend their time.
185 interactive environment in which many scientists spend their time.
186 IPython provides a powerful Python shell that integrates tightly with
186 IPython provides a powerful Python shell that integrates tightly with
187 Matplotlib and with easy access to the files and operating system, and
187 Matplotlib and with easy access to the files and operating system, and
188 which can execute in a terminal or in a graphical Qt console. IPython
188 which can execute in a terminal or in a graphical Qt console. IPython
189 also has a web-based notebook interface that can combine code with
189 also has a web-based notebook interface that can combine code with
190 text, mathematical expressions, figures and multimedia. It can be
190 text, mathematical expressions, figures and multimedia. It can be
191 found at \href{http://ipython.org}{ipython.org}.
191 found at \href{http://ipython.org}{ipython.org}.
192 \end{itemize}
192 \end{itemize}
193 While each of these tools can be installed separately, in our opinion
193 While each of these tools can be installed separately, in our opinion
194 the most convenient way today of accessing them (especially on Windows
194 the most convenient way today of accessing them (especially on Windows
195 and Mac computers) is to install the
195 and Mac computers) is to install the
196 \href{http://www.enthought.com/products/epd\_free.php}{Free Edition of
196 \href{http://www.enthought.com/products/epd\_free.php}{Free Edition of
197 the Enthought Python Distribution} which contain all the above. Other
197 the Enthought Python Distribution} which contain all the above. Other
198 free alternatives on Windows (but not on Macs) are
198 free alternatives on Windows (but not on Macs) are
199 \href{http://code.google.com/p/pythonxy}{Python(x,y)} and
199 \href{http://code.google.com/p/pythonxy}{Python(x,y)} and
200 \href{http://www.lfd.uci.edu/~gohlke/pythonlibs}{Christoph Gohlke's
200 \href{http://www.lfd.uci.edu/~gohlke/pythonlibs}{Christoph Gohlke's
201 packages page}.
201 packages page}.
202
202
203 These four `core' libraries are in practice complemented by a number of
203 These four `core' libraries are in practice complemented by a number of
204 other tools for more specialized work. We will briefly list here the
204 other tools for more specialized work. We will briefly list here the
205 ones that we think are the most commonly needed:
205 ones that we think are the most commonly needed:
206
206
207 \begin{itemize}
207 \begin{itemize}
208 \item
208 \item
209 Sympy: a symbolic manipulation tool that turns a Python session into a
209 Sympy: a symbolic manipulation tool that turns a Python session into a
210 computer algebra system. It integrates with the IPython notebook,
210 computer algebra system. It integrates with the IPython notebook,
211 rendering results in properly typeset mathematical notation.
211 rendering results in properly typeset mathematical notation.
212 \href{http://sympy.org}{sympy.org}.
212 \href{http://sympy.org}{sympy.org}.
213 \item
213 \item
214 Mayavi: sophisticated 3d data visualization;
214 Mayavi: sophisticated 3d data visualization;
215 \href{http://code.enthought.com/projects/mayavi}{code.enthought.com/projects/mayavi}.
215 \href{http://code.enthought.com/projects/mayavi}{code.enthought.com/projects/mayavi}.
216 \item
216 \item
217 Cython: a bridge language between Python and C, useful both to
217 Cython: a bridge language between Python and C, useful both to
218 optimize performance bottlenecks in Python and to access C libraries
218 optimize performance bottlenecks in Python and to access C libraries
219 directly; \href{http://cython.org}{cython.org}.
219 directly; \href{http://cython.org}{cython.org}.
220 \item
220 \item
221 Pandas: high-performance data structures and data analysis tools, with
221 Pandas: high-performance data structures and data analysis tools, with
222 powerful data alignment and structural manipulation capabilities;
222 powerful data alignment and structural manipulation capabilities;
223 \href{http://pandas.pydata.org}{pandas.pydata.org}.
223 \href{http://pandas.pydata.org}{pandas.pydata.org}.
224 \item
224 \item
225 Statsmodels: statistical data exploration and model estimation;
225 Statsmodels: statistical data exploration and model estimation;
226 \href{http://statsmodels.sourceforge.net}{statsmodels.sourceforge.net}.
226 \href{http://statsmodels.sourceforge.net}{statsmodels.sourceforge.net}.
227 \item
227 \item
228 Scikit-learn: general purpose machine learning algorithms with a
228 Scikit-learn: general purpose machine learning algorithms with a
229 common interface; \href{http://scikit-learn.org}{scikit-learn.org}.
229 common interface; \href{http://scikit-learn.org}{scikit-learn.org}.
230 \item
230 \item
231 Scikits-image: image processing toolbox;
231 Scikits-image: image processing toolbox;
232 \href{http://scikits-image.org}{scikits-image.org}.
232 \href{http://scikits-image.org}{scikits-image.org}.
233 \item
233 \item
234 NetworkX: analysis of complex networks (in the graph theoretical
234 NetworkX: analysis of complex networks (in the graph theoretical
235 sense); \href{http://networkx.lanl.gov}{networkx.lanl.gov}.
235 sense); \href{http://networkx.lanl.gov}{networkx.lanl.gov}.
236 \item
236 \item
237 PyTables: management of hierarchical datasets using the
237 PyTables: management of hierarchical datasets using the
238 industry-standard HDF5 format;
238 industry-standard HDF5 format;
239 \href{http://www.pytables.org}{www.pytables.org}.
239 \href{http://www.pytables.org}{www.pytables.org}.
240 \end{itemize}
240 \end{itemize}
241 Beyond these, for any specific problem you should look on the internet
241 Beyond these, for any specific problem you should look on the internet
242 first, before starting to write code from scratch. There's a good chance
242 first, before starting to write code from scratch. There's a good chance
243 that someone, somewhere, has written an open source library that you can
243 that someone, somewhere, has written an open source library that you can
244 use for part or all of your problem.
244 use for part or all of your problem.
245
245
246 \subsection{A note about the examples below}
246 \subsection{A note about the examples below}
247 In all subsequent examples, you will see blocks of input code, followed
247 In all subsequent examples, you will see blocks of input code, followed
248 by the results of the code if the code generated output. This output may
248 by the results of the code if the code generated output. This output may
249 include text, graphics and other result objects. These blocks of input
249 include text, graphics and other result objects. These blocks of input
250 can be pasted into your interactive IPython session or notebook for you
250 can be pasted into your interactive IPython session or notebook for you
251 to execute. In the print version of this document, a thin vertical bar
251 to execute. In the print version of this document, a thin vertical bar
252 on the left of the blocks of input and output shows which blocks go
252 on the left of the blocks of input and output shows which blocks go
253 together.
253 together.
254
254
255 If you are reading this text as an actual IPython notebook, you can
255 If you are reading this text as an actual IPython notebook, you can
256 press \texttt{Shift-Enter} or use the `play' button on the toolbar
256 press \texttt{Shift-Enter} or use the `play' button on the toolbar
257 (right-pointing triangle) to execute each block of code, known as a
257 (right-pointing triangle) to execute each block of code, known as a
258 `cell' in IPython:
258 `cell' in IPython:
259
259
260 \begin{codecell}
260 \begin{codecell}
261 \begin{codeinput}
261 \begin{codeinput}
262 \begin{lstlisting}
262 \begin{lstlisting}
263 # This is a block of code, below you'll see its output
263 # This is a block of code, below you'll see its output
264 print "Welcome to the world of scientific computing with Python!"
264 print "Welcome to the world of scientific computing with Python!"
265 \end{lstlisting}
265 \end{lstlisting}
266 \end{codeinput}
266 \end{codeinput}
267 \begin{codeoutput}
267 \begin{codeoutput}
268 \begin{verbatim}
268 \begin{verbatim}
269 Welcome to the world of scientific computing with Python!
269 Welcome to the world of scientific computing with Python!
270 \end{verbatim}
270 \end{verbatim}
271 \end{codeoutput}
271 \end{codeoutput}
272 \end{codecell}
272 \end{codecell}
273 \section{Motivation: the trapezoidal rule}
273 \section{Motivation: the trapezoidal rule}
274 In subsequent sections we'll provide a basic introduction to the nuts
274 In subsequent sections we'll provide a basic introduction to the nuts
275 and bolts of the basic scientific python tools; but we'll first motivate
275 and bolts of the basic scientific python tools; but we'll first motivate
276 it with a brief example that illustrates what you can do in a few lines
276 it with a brief example that illustrates what you can do in a few lines
277 with these tools. For this, we will use the simple problem of
277 with these tools. For this, we will use the simple problem of
278 approximating a definite integral with the trapezoid rule:
278 approximating a definite integral with the trapezoid rule:
279
279
280 \[
280 \[
281 \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
281 \int_{a}^{b} f(x)\, dx \approx \frac{1}{2} \sum_{k=1}^{N} \left( x_{k} - x_{k-1} \right) \left( f(x_{k}) + f(x_{k-1}) \right).
282 \]
282 \]
283
283
284 Our task will be to compute this formula for a function such as:
284 Our task will be to compute this formula for a function such as:
285
285
286 \[
286 \[
287 f(x) = (x-3)(x-5)(x-7)+85
287 f(x) = (x-3)(x-5)(x-7)+85
288 \]
288 \]
289
289
290 integrated between $a=1$ and $b=9$.
290 integrated between $a=1$ and $b=9$.
291
291
292 First, we define the function and sample it evenly between 0 and 10 at
292 First, we define the function and sample it evenly between 0 and 10 at
293 200 points:
293 200 points:
294
294
295 \begin{codecell}
295 \begin{codecell}
296 \begin{codeinput}
296 \begin{codeinput}
297 \begin{lstlisting}
297 \begin{lstlisting}
298 def f(x):
298 def f(x):
299 return (x-3)*(x-5)*(x-7)+85
299 return (x-3)*(x-5)*(x-7)+85
300
300
301 import numpy as np
301 import numpy as np
302 x = np.linspace(0, 10, 200)
302 x = np.linspace(0, 10, 200)
303 y = f(x)
303 y = f(x)
304 \end{lstlisting}
304 \end{lstlisting}
305 \end{codeinput}
305 \end{codeinput}
306 \end{codecell}
306 \end{codecell}
307 We select $a$ and $b$, our integration limits, and we take only a few
307 We select $a$ and $b$, our integration limits, and we take only a few
308 points in that region to illustrate the error behavior of the trapezoid
308 points in that region to illustrate the error behavior of the trapezoid
309 approximation:
309 approximation:
310
310
311 \begin{codecell}
311 \begin{codecell}
312 \begin{codeinput}
312 \begin{codeinput}
313 \begin{lstlisting}
313 \begin{lstlisting}
314 a, b = 1, 9
314 a, b = 1, 9
315 xint = x[logical_and(x>=a, x<=b)][::30]
315 xint = x[logical_and(x>=a, x<=b)][::30]
316 yint = y[logical_and(x>=a, x<=b)][::30]
316 yint = y[logical_and(x>=a, x<=b)][::30]
317 \end{lstlisting}
317 \end{lstlisting}
318 \end{codeinput}
318 \end{codeinput}
319 \end{codecell}
319 \end{codecell}
320 Let's plot both the function and the area below it in the trapezoid
320 Let's plot both the function and the area below it in the trapezoid
321 approximation:
321 approximation:
322
322
323 \begin{codecell}
323 \begin{codecell}
324 \begin{codeinput}
324 \begin{codeinput}
325 \begin{lstlisting}
325 \begin{lstlisting}
326 import matplotlib.pyplot as plt
326 import matplotlib.pyplot as plt
327 plt.plot(x, y, lw=2)
327 plt.plot(x, y, lw=2)
328 plt.axis([0, 10, 0, 140])
328 plt.axis([0, 10, 0, 140])
329 plt.fill_between(xint, 0, yint, facecolor='gray', alpha=0.4)
329 plt.fill_between(xint, 0, yint, facecolor='gray', alpha=0.4)
330 plt.text(0.5 * (a + b), 30,r"$\int_a^b f(x)dx$", horizontalalignment='center', fontsize=20);
330 plt.text(0.5 * (a + b), 30,r"$\int_a^b f(x)dx$", horizontalalignment='center', fontsize=20);
331 \end{lstlisting}
331 \end{lstlisting}
332 \end{codeinput}
332 \end{codeinput}
333 \begin{codeoutput}
333 \begin{codeoutput}
334 \begin{center}
334 \begin{center}
335 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.pdf}
335 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_00.pdf}
336 \par
336 \par
337 \end{center}
337 \end{center}
338 \end{codeoutput}
338 \end{codeoutput}
339 \end{codecell}
339 \end{codecell}
340 Compute the integral both at high accuracy and with the trapezoid
340 Compute the integral both at high accuracy and with the trapezoid
341 approximation
341 approximation
342
342
343 \begin{codecell}
343 \begin{codecell}
344 \begin{codeinput}
344 \begin{codeinput}
345 \begin{lstlisting}
345 \begin{lstlisting}
346 from scipy.integrate import quad, trapz
346 from scipy.integrate import quad, trapz
347 integral, error = quad(f, 1, 9)
347 integral, error = quad(f, 1, 9)
348 trap_integral = trapz(yint, xint)
348 trap_integral = trapz(yint, xint)
349 print "The integral is: %g +/- %.1e" % (integral, error)
349 print "The integral is: %g +/- %.1e" % (integral, error)
350 print "The trapezoid approximation with", len(xint), "points is:", trap_integral
350 print "The trapezoid approximation with", len(xint), "points is:", trap_integral
351 print "The absolute error is:", abs(integral - trap_integral)
351 print "The absolute error is:", abs(integral - trap_integral)
352 \end{lstlisting}
352 \end{lstlisting}
353 \end{codeinput}
353 \end{codeinput}
354 \begin{codeoutput}
354 \begin{codeoutput}
355 \begin{verbatim}
355 \begin{verbatim}
356 The integral is: 680 +/- 7.5e-12
356 The integral is: 680 +/- 7.5e-12
357 The trapezoid approximation with 6 points is: 621.286411141
357 The trapezoid approximation with 6 points is: 621.286411141
358 The absolute error is: 58.7135888589
358 The absolute error is: 58.7135888589
359 \end{verbatim}
359 \end{verbatim}
360 \end{codeoutput}
360 \end{codeoutput}
361 \end{codecell}
361 \end{codecell}
362 This simple example showed us how, combining the numpy, scipy and
362 This simple example showed us how, combining the numpy, scipy and
363 matplotlib libraries we can provide an illustration of a standard method
363 matplotlib libraries we can provide an illustration of a standard method
364 in elementary calculus with just a few lines of code. We will now
364 in elementary calculus with just a few lines of code. We will now
365 discuss with more detail the basic usage of these tools.
365 discuss with more detail the basic usage of these tools.
366
366
367 \section{NumPy arrays: the right data structure for scientific computing}
367 \section{NumPy arrays: the right data structure for scientific computing}
368 \subsection{Basics of Numpy arrays}
368 \subsection{Basics of Numpy arrays}
369 We now turn our attention to the Numpy library, which forms the base
369 We now turn our attention to the Numpy library, which forms the base
370 layer for the entire `scipy ecosystem'. Once you have installed numpy,
370 layer for the entire `scipy ecosystem'. Once you have installed numpy,
371 you can import it as
371 you can import it as
372
372
373 \begin{codecell}
373 \begin{codecell}
374 \begin{codeinput}
374 \begin{codeinput}
375 \begin{lstlisting}
375 \begin{lstlisting}
376 import numpy
376 import numpy
377 \end{lstlisting}
377 \end{lstlisting}
378 \end{codeinput}
378 \end{codeinput}
379 \end{codecell}
379 \end{codecell}
380 though in this book we will use the common shorthand
380 though in this book we will use the common shorthand
381
381
382 \begin{codecell}
382 \begin{codecell}
383 \begin{codeinput}
383 \begin{codeinput}
384 \begin{lstlisting}
384 \begin{lstlisting}
385 import numpy as np
385 import numpy as np
386 \end{lstlisting}
386 \end{lstlisting}
387 \end{codeinput}
387 \end{codeinput}
388 \end{codecell}
388 \end{codecell}
389 As mentioned above, the main object provided by numpy is a powerful
389 As mentioned above, the main object provided by numpy is a powerful
390 array. We'll start by exploring how the numpy array differs from Python
390 array. We'll start by exploring how the numpy array differs from Python
391 lists. We start by creating a simple list and an array with the same
391 lists. We start by creating a simple list and an array with the same
392 contents of the list:
392 contents of the list:
393
393
394 \begin{codecell}
394 \begin{codecell}
395 \begin{codeinput}
395 \begin{codeinput}
396 \begin{lstlisting}
396 \begin{lstlisting}
397 lst = [10, 20, 30, 40]
397 lst = [10, 20, 30, 40]
398 arr = np.array([10, 20, 30, 40])
398 arr = np.array([10, 20, 30, 40])
399 \end{lstlisting}
399 \end{lstlisting}
400 \end{codeinput}
400 \end{codeinput}
401 \end{codecell}
401 \end{codecell}
402 Elements of a one-dimensional array are accessed with the same syntax as
402 Elements of a one-dimensional array are accessed with the same syntax as
403 a list:
403 a list:
404
404
405 \begin{codecell}
405 \begin{codecell}
406 \begin{codeinput}
406 \begin{codeinput}
407 \begin{lstlisting}
407 \begin{lstlisting}
408 lst[0]
408 lst[0]
409 \end{lstlisting}
409 \end{lstlisting}
410 \end{codeinput}
410 \end{codeinput}
411 \begin{codeoutput}
411 \begin{codeoutput}
412 \begin{verbatim}
412 \begin{verbatim}
413 10
413 10
414 \end{verbatim}
414 \end{verbatim}
415 \end{codeoutput}
415 \end{codeoutput}
416 \end{codecell}
416 \end{codecell}
417 \begin{codecell}
417 \begin{codecell}
418 \begin{codeinput}
418 \begin{codeinput}
419 \begin{lstlisting}
419 \begin{lstlisting}
420 arr[0]
420 arr[0]
421 \end{lstlisting}
421 \end{lstlisting}
422 \end{codeinput}
422 \end{codeinput}
423 \begin{codeoutput}
423 \begin{codeoutput}
424 \begin{verbatim}
424 \begin{verbatim}
425 10
425 10
426 \end{verbatim}
426 \end{verbatim}
427 \end{codeoutput}
427 \end{codeoutput}
428 \end{codecell}
428 \end{codecell}
429 \begin{codecell}
429 \begin{codecell}
430 \begin{codeinput}
430 \begin{codeinput}
431 \begin{lstlisting}
431 \begin{lstlisting}
432 arr[-1]
432 arr[-1]
433 \end{lstlisting}
433 \end{lstlisting}
434 \end{codeinput}
434 \end{codeinput}
435 \begin{codeoutput}
435 \begin{codeoutput}
436 \begin{verbatim}
436 \begin{verbatim}
437 40
437 40
438 \end{verbatim}
438 \end{verbatim}
439 \end{codeoutput}
439 \end{codeoutput}
440 \end{codecell}
440 \end{codecell}
441 \begin{codecell}
441 \begin{codecell}
442 \begin{codeinput}
442 \begin{codeinput}
443 \begin{lstlisting}
443 \begin{lstlisting}
444 arr[2:]
444 arr[2:]
445 \end{lstlisting}
445 \end{lstlisting}
446 \end{codeinput}
446 \end{codeinput}
447 \begin{codeoutput}
447 \begin{codeoutput}
448 \begin{verbatim}
448 \begin{verbatim}
449 array([30, 40])
449 array([30, 40])
450 \end{verbatim}
450 \end{verbatim}
451 \end{codeoutput}
451 \end{codeoutput}
452 \end{codecell}
452 \end{codecell}
453 The first difference to note between lists and arrays is that arrays are
453 The first difference to note between lists and arrays is that arrays are
454 \emph{homogeneous}; i.e.~all elements of an array must be of the same
454 \emph{homogeneous}; i.e.~all elements of an array must be of the same
455 type. In contrast, lists can contain elements of arbitrary type. For
455 type. In contrast, lists can contain elements of arbitrary type. For
456 example, we can change the last element in our list above to be a
456 example, we can change the last element in our list above to be a
457 string:
457 string:
458
458
459 \begin{codecell}
459 \begin{codecell}
460 \begin{codeinput}
460 \begin{codeinput}
461 \begin{lstlisting}
461 \begin{lstlisting}
462 lst[-1] = 'a string inside a list'
462 lst[-1] = 'a string inside a list'
463 lst
463 lst
464 \end{lstlisting}
464 \end{lstlisting}
465 \end{codeinput}
465 \end{codeinput}
466 \begin{codeoutput}
466 \begin{codeoutput}
467 \begin{verbatim}
467 \begin{verbatim}
468 [10, 20, 30, 'a string inside a list']
468 [10, 20, 30, 'a string inside a list']
469 \end{verbatim}
469 \end{verbatim}
470 \end{codeoutput}
470 \end{codeoutput}
471 \end{codecell}
471 \end{codecell}
472 but the same can not be done with an array, as we get an error message:
472 but the same can not be done with an array, as we get an error message:
473
473
474 \begin{codecell}
474 \begin{codecell}
475 \begin{codeinput}
475 \begin{codeinput}
476 \begin{lstlisting}
476 \begin{lstlisting}
477 arr[-1] = 'a string inside an array'
477 arr[-1] = 'a string inside an array'
478 \end{lstlisting}
478 \end{lstlisting}
479 \end{codeinput}
479 \end{codeinput}
480 \begin{codeoutput}
480 \begin{codeoutput}
481 \begin{traceback}
481 \begin{traceback}
482 \begin{verbatim}
482 \begin{verbatim}
483 ---------------------------------------------------------------------------
483 ---------------------------------------------------------------------------
484 ValueError Traceback (most recent call last)
484 ValueError Traceback (most recent call last)
485 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
485 /home/fperez/teach/book-math-labtool/<ipython-input-13-29c0bfa5fa8a> in <module>()
486 ----> 1 arr[-1] = 'a string inside an array'
486 ----> 1 arr[-1] = 'a string inside an array'
487
487
488 ValueError: invalid literal for long() with base 10: 'a string inside an array'
488 ValueError: invalid literal for long() with base 10: 'a string inside an array'
489 \end{verbatim}
489 \end{verbatim}
490 \end{traceback}
490 \end{traceback}
491 \end{codeoutput}
491 \end{codeoutput}
492 \end{codecell}
492 \end{codecell}
493 The information about the type of an array is contained in its
493 The information about the type of an array is contained in its
494 \emph{dtype} attribute:
494 \emph{dtype} attribute:
495
495
496 \begin{codecell}
496 \begin{codecell}
497 \begin{codeinput}
497 \begin{codeinput}
498 \begin{lstlisting}
498 \begin{lstlisting}
499 arr.dtype
499 arr.dtype
500 \end{lstlisting}
500 \end{lstlisting}
501 \end{codeinput}
501 \end{codeinput}
502 \begin{codeoutput}
502 \begin{codeoutput}
503 \begin{verbatim}
503 \begin{verbatim}
504 dtype('int32')
504 dtype('int32')
505 \end{verbatim}
505 \end{verbatim}
506 \end{codeoutput}
506 \end{codeoutput}
507 \end{codecell}
507 \end{codecell}
508 Once an array has been created, its dtype is fixed and it can only store
508 Once an array has been created, its dtype is fixed and it can only store
509 elements of the same type. For this example where the dtype is integer,
509 elements of the same type. For this example where the dtype is integer,
510 if we store a floating point number it will be automatically converted
510 if we store a floating point number it will be automatically converted
511 into an integer:
511 into an integer:
512
512
513 \begin{codecell}
513 \begin{codecell}
514 \begin{codeinput}
514 \begin{codeinput}
515 \begin{lstlisting}
515 \begin{lstlisting}
516 arr[-1] = 1.234
516 arr[-1] = 1.234
517 arr
517 arr
518 \end{lstlisting}
518 \end{lstlisting}
519 \end{codeinput}
519 \end{codeinput}
520 \begin{codeoutput}
520 \begin{codeoutput}
521 \begin{verbatim}
521 \begin{verbatim}
522 array([10, 20, 30, 1])
522 array([10, 20, 30, 1])
523 \end{verbatim}
523 \end{verbatim}
524 \end{codeoutput}
524 \end{codeoutput}
525 \end{codecell}
525 \end{codecell}
526 Above we created an array from an existing list; now let us now see
526 Above we created an array from an existing list; now let us now see
527 other ways in which we can create arrays, which we'll illustrate next. A
527 other ways in which we can create arrays, which we'll illustrate next. A
528 common need is to have an array initialized with a constant value, and
528 common need is to have an array initialized with a constant value, and
529 very often this value is 0 or 1 (suitable as starting value for additive
529 very often this value is 0 or 1 (suitable as starting value for additive
530 and multiplicative loops respectively); \texttt{zeros} creates arrays of
530 and multiplicative loops respectively); \texttt{zeros} creates arrays of
531 all zeros, with any desired dtype:
531 all zeros, with any desired dtype:
532
532
533 \begin{codecell}
533 \begin{codecell}
534 \begin{codeinput}
534 \begin{codeinput}
535 \begin{lstlisting}
535 \begin{lstlisting}
536 np.zeros(5, float)
536 np.zeros(5, float)
537 \end{lstlisting}
537 \end{lstlisting}
538 \end{codeinput}
538 \end{codeinput}
539 \begin{codeoutput}
539 \begin{codeoutput}
540 \begin{verbatim}
540 \begin{verbatim}
541 array([ 0., 0., 0., 0., 0.])
541 array([ 0., 0., 0., 0., 0.])
542 \end{verbatim}
542 \end{verbatim}
543 \end{codeoutput}
543 \end{codeoutput}
544 \end{codecell}
544 \end{codecell}
545 \begin{codecell}
545 \begin{codecell}
546 \begin{codeinput}
546 \begin{codeinput}
547 \begin{lstlisting}
547 \begin{lstlisting}
548 np.zeros(3, int)
548 np.zeros(3, int)
549 \end{lstlisting}
549 \end{lstlisting}
550 \end{codeinput}
550 \end{codeinput}
551 \begin{codeoutput}
551 \begin{codeoutput}
552 \begin{verbatim}
552 \begin{verbatim}
553 array([0, 0, 0])
553 array([0, 0, 0])
554 \end{verbatim}
554 \end{verbatim}
555 \end{codeoutput}
555 \end{codeoutput}
556 \end{codecell}
556 \end{codecell}
557 \begin{codecell}
557 \begin{codecell}
558 \begin{codeinput}
558 \begin{codeinput}
559 \begin{lstlisting}
559 \begin{lstlisting}
560 np.zeros(3, complex)
560 np.zeros(3, complex)
561 \end{lstlisting}
561 \end{lstlisting}
562 \end{codeinput}
562 \end{codeinput}
563 \begin{codeoutput}
563 \begin{codeoutput}
564 \begin{verbatim}
564 \begin{verbatim}
565 array([ 0.+0.j, 0.+0.j, 0.+0.j])
565 array([ 0.+0.j, 0.+0.j, 0.+0.j])
566 \end{verbatim}
566 \end{verbatim}
567 \end{codeoutput}
567 \end{codeoutput}
568 \end{codecell}
568 \end{codecell}
569 and similarly for \texttt{ones}:
569 and similarly for \texttt{ones}:
570
570
571 \begin{codecell}
571 \begin{codecell}
572 \begin{codeinput}
572 \begin{codeinput}
573 \begin{lstlisting}
573 \begin{lstlisting}
574 print '5 ones:', np.ones(5)
574 print '5 ones:', np.ones(5)
575 \end{lstlisting}
575 \end{lstlisting}
576 \end{codeinput}
576 \end{codeinput}
577 \begin{codeoutput}
577 \begin{codeoutput}
578 \begin{verbatim}
578 \begin{verbatim}
579 5 ones: [ 1. 1. 1. 1. 1.]
579 5 ones: [ 1. 1. 1. 1. 1.]
580 \end{verbatim}
580 \end{verbatim}
581 \end{codeoutput}
581 \end{codeoutput}
582 \end{codecell}
582 \end{codecell}
583 If we want an array initialized with an arbitrary value, we can create
583 If we want an array initialized with an arbitrary value, we can create
584 an empty array and then use the fill method to put the value we want
584 an empty array and then use the fill method to put the value we want
585 into the array:
585 into the array:
586
586
587 \begin{codecell}
587 \begin{codecell}
588 \begin{codeinput}
588 \begin{codeinput}
589 \begin{lstlisting}
589 \begin{lstlisting}
590 a = empty(4)
590 a = empty(4)
591 a.fill(5.5)
591 a.fill(5.5)
592 a
592 a
593 \end{lstlisting}
593 \end{lstlisting}
594 \end{codeinput}
594 \end{codeinput}
595 \begin{codeoutput}
595 \begin{codeoutput}
596 \begin{verbatim}
596 \begin{verbatim}
597 array([ 5.5, 5.5, 5.5, 5.5])
597 array([ 5.5, 5.5, 5.5, 5.5])
598 \end{verbatim}
598 \end{verbatim}
599 \end{codeoutput}
599 \end{codeoutput}
600 \end{codecell}
600 \end{codecell}
601 Numpy also offers the \texttt{arange} function, which works like the
601 Numpy also offers the \texttt{arange} function, which works like the
602 builtin \texttt{range} but returns an array instead of a list:
602 builtin \texttt{range} but returns an array instead of a list:
603
603
604 \begin{codecell}
604 \begin{codecell}
605 \begin{codeinput}
605 \begin{codeinput}
606 \begin{lstlisting}
606 \begin{lstlisting}
607 np.arange(5)
607 np.arange(5)
608 \end{lstlisting}
608 \end{lstlisting}
609 \end{codeinput}
609 \end{codeinput}
610 \begin{codeoutput}
610 \begin{codeoutput}
611 \begin{verbatim}
611 \begin{verbatim}
612 array([0, 1, 2, 3, 4])
612 array([0, 1, 2, 3, 4])
613 \end{verbatim}
613 \end{verbatim}
614 \end{codeoutput}
614 \end{codeoutput}
615 \end{codecell}
615 \end{codecell}
616 and the \texttt{linspace} and \texttt{logspace} functions to create
616 and the \texttt{linspace} and \texttt{logspace} functions to create
617 linearly and logarithmically-spaced grids respectively, with a fixed
617 linearly and logarithmically-spaced grids respectively, with a fixed
618 number of points and including both ends of the specified interval:
618 number of points and including both ends of the specified interval:
619
619
620 \begin{codecell}
620 \begin{codecell}
621 \begin{codeinput}
621 \begin{codeinput}
622 \begin{lstlisting}
622 \begin{lstlisting}
623 print "A linear grid between 0 and 1:", np.linspace(0, 1, 5)
623 print "A linear grid between 0 and 1:", np.linspace(0, 1, 5)
624 print "A logarithmic grid between 10**1 and 10**4: ", np.logspace(1, 4, 4)
624 print "A logarithmic grid between 10**1 and 10**4: ", np.logspace(1, 4, 4)
625 \end{lstlisting}
625 \end{lstlisting}
626 \end{codeinput}
626 \end{codeinput}
627 \begin{codeoutput}
627 \begin{codeoutput}
628 \begin{verbatim}
628 \begin{verbatim}
629 A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
629 A linear grid between 0 and 1: [ 0. 0.25 0.5 0.75 1. ]
630 A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
630 A logarithmic grid between 10**1 and 10**4: [ 10. 100. 1000. 10000.]
631 \end{verbatim}
631 \end{verbatim}
632 \end{codeoutput}
632 \end{codeoutput}
633 \end{codecell}
633 \end{codecell}
634 Finally, it is often useful to create arrays with random numbers that
634 Finally, it is often useful to create arrays with random numbers that
635 follow a specific distribution. The \texttt{np.random} module contains a
635 follow a specific distribution. The \texttt{np.random} module contains a
636 number of functions that can be used to this effect, for example this
636 number of functions that can be used to this effect, for example this
637 will produce an array of 5 random samples taken from a standard normal
637 will produce an array of 5 random samples taken from a standard normal
638 distribution (0 mean and variance 1):
638 distribution (0 mean and variance 1):
639
639
640 \begin{codecell}
640 \begin{codecell}
641 \begin{codeinput}
641 \begin{codeinput}
642 \begin{lstlisting}
642 \begin{lstlisting}
643 np.random.randn(5)
643 np.random.randn(5)
644 \end{lstlisting}
644 \end{lstlisting}
645 \end{codeinput}
645 \end{codeinput}
646 \begin{codeoutput}
646 \begin{codeoutput}
647 \begin{verbatim}
647 \begin{verbatim}
648 array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
648 array([-0.08633343, -0.67375434, 1.00589536, 0.87081651, 1.65597822])
649 \end{verbatim}
649 \end{verbatim}
650 \end{codeoutput}
650 \end{codeoutput}
651 \end{codecell}
651 \end{codecell}
652 whereas this will also give 5 samples, but from a normal distribution
652 whereas this will also give 5 samples, but from a normal distribution
653 with a mean of 10 and a variance of 3:
653 with a mean of 10 and a variance of 3:
654
654
655 \begin{codecell}
655 \begin{codecell}
656 \begin{codeinput}
656 \begin{codeinput}
657 \begin{lstlisting}
657 \begin{lstlisting}
658 norm10 = np.random.normal(10, 3, 5)
658 norm10 = np.random.normal(10, 3, 5)
659 norm10
659 norm10
660 \end{lstlisting}
660 \end{lstlisting}
661 \end{codeinput}
661 \end{codeinput}
662 \begin{codeoutput}
662 \begin{codeoutput}
663 \begin{verbatim}
663 \begin{verbatim}
664 array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
664 array([ 8.94879575, 5.53038269, 8.24847281, 12.14944165, 11.56209294])
665 \end{verbatim}
665 \end{verbatim}
666 \end{codeoutput}
666 \end{codeoutput}
667 \end{codecell}
667 \end{codecell}
668 \subsection{Indexing with other arrays}
668 \subsection{Indexing with other arrays}
669 Above we saw how to index arrays with single numbers and slices, just
669 Above we saw how to index arrays with single numbers and slices, just
670 like Python lists. But arrays allow for a more sophisticated kind of
670 like Python lists. But arrays allow for a more sophisticated kind of
671 indexing which is very powerful: you can index an array with another
671 indexing which is very powerful: you can index an array with another
672 array, and in particular with an array of boolean values. This is
672 array, and in particular with an array of boolean values. This is
673 particluarly useful to extract information from an array that matches a
673 particluarly useful to extract information from an array that matches a
674 certain condition.
674 certain condition.
675
675
676 Consider for example that in the array \texttt{norm10} we want to
676 Consider for example that in the array \texttt{norm10} we want to
677 replace all values above 9 with the value 0. We can do so by first
677 replace all values above 9 with the value 0. We can do so by first
678 finding the \emph{mask} that indicates where this condition is true or
678 finding the \emph{mask} that indicates where this condition is true or
679 false:
679 false:
680
680
681 \begin{codecell}
681 \begin{codecell}
682 \begin{codeinput}
682 \begin{codeinput}
683 \begin{lstlisting}
683 \begin{lstlisting}
684 mask = norm10 > 9
684 mask = norm10 > 9
685 mask
685 mask
686 \end{lstlisting}
686 \end{lstlisting}
687 \end{codeinput}
687 \end{codeinput}
688 \begin{codeoutput}
688 \begin{codeoutput}
689 \begin{verbatim}
689 \begin{verbatim}
690 array([False, False, False, True, True], dtype=bool)
690 array([False, False, False, True, True], dtype=bool)
691 \end{verbatim}
691 \end{verbatim}
692 \end{codeoutput}
692 \end{codeoutput}
693 \end{codecell}
693 \end{codecell}
694 Now that we have this mask, we can use it to either read those values or
694 Now that we have this mask, we can use it to either read those values or
695 to reset them to 0:
695 to reset them to 0:
696
696
697 \begin{codecell}
697 \begin{codecell}
698 \begin{codeinput}
698 \begin{codeinput}
699 \begin{lstlisting}
699 \begin{lstlisting}
700 print 'Values above 9:', norm10[mask]
700 print 'Values above 9:', norm10[mask]
701 \end{lstlisting}
701 \end{lstlisting}
702 \end{codeinput}
702 \end{codeinput}
703 \begin{codeoutput}
703 \begin{codeoutput}
704 \begin{verbatim}
704 \begin{verbatim}
705 Values above 9: [ 12.14944165 11.56209294]
705 Values above 9: [ 12.14944165 11.56209294]
706 \end{verbatim}
706 \end{verbatim}
707 \end{codeoutput}
707 \end{codeoutput}
708 \end{codecell}
708 \end{codecell}
709 \begin{codecell}
709 \begin{codecell}
710 \begin{codeinput}
710 \begin{codeinput}
711 \begin{lstlisting}
711 \begin{lstlisting}
712 print 'Resetting all values above 9 to 0...'
712 print 'Resetting all values above 9 to 0...'
713 norm10[mask] = 0
713 norm10[mask] = 0
714 print norm10
714 print norm10
715 \end{lstlisting}
715 \end{lstlisting}
716 \end{codeinput}
716 \end{codeinput}
717 \begin{codeoutput}
717 \begin{codeoutput}
718 \begin{verbatim}
718 \begin{verbatim}
719 Resetting all values above 9 to 0...
719 Resetting all values above 9 to 0...
720 [ 8.94879575 5.53038269 8.24847281 0. 0. ]
720 [ 8.94879575 5.53038269 8.24847281 0. 0. ]
721 \end{verbatim}
721 \end{verbatim}
722 \end{codeoutput}
722 \end{codeoutput}
723 \end{codecell}
723 \end{codecell}
724 \subsection{Arrays with more than one dimension}
724 \subsection{Arrays with more than one dimension}
725 Up until now all our examples have used one-dimensional arrays. But
725 Up until now all our examples have used one-dimensional arrays. But
726 Numpy can create arrays of aribtrary dimensions, and all the methods
726 Numpy can create arrays of aribtrary dimensions, and all the methods
727 illustrated in the previous section work with more than one dimension.
727 illustrated in the previous section work with more than one dimension.
728 For example, a list of lists can be used to initialize a two dimensional
728 For example, a list of lists can be used to initialize a two dimensional
729 array:
729 array:
730
730
731 \begin{codecell}
731 \begin{codecell}
732 \begin{codeinput}
732 \begin{codeinput}
733 \begin{lstlisting}
733 \begin{lstlisting}
734 lst2 = [[1, 2], [3, 4]]
734 lst2 = [[1, 2], [3, 4]]
735 arr2 = np.array([[1, 2], [3, 4]])
735 arr2 = np.array([[1, 2], [3, 4]])
736 arr2
736 arr2
737 \end{lstlisting}
737 \end{lstlisting}
738 \end{codeinput}
738 \end{codeinput}
739 \begin{codeoutput}
739 \begin{codeoutput}
740 \begin{verbatim}
740 \begin{verbatim}
741 array([[1, 2],
741 array([[1, 2],
742 [3, 4]])
742 [3, 4]])
743 \end{verbatim}
743 \end{verbatim}
744 \end{codeoutput}
744 \end{codeoutput}
745 \end{codecell}
745 \end{codecell}
746 With two-dimensional arrays we start seeing the power of numpy: while a
746 With two-dimensional arrays we start seeing the power of numpy: while a
747 nested list can be indexed using repeatedly the \texttt{{[} {]}}
747 nested list can be indexed using repeatedly the \texttt{{[} {]}}
748 operator, multidimensional arrays support a much more natural indexing
748 operator, multidimensional arrays support a much more natural indexing
749 syntax with a single \texttt{{[} {]}} and a set of indices separated by
749 syntax with a single \texttt{{[} {]}} and a set of indices separated by
750 commas:
750 commas:
751
751
752 \begin{codecell}
752 \begin{codecell}
753 \begin{codeinput}
753 \begin{codeinput}
754 \begin{lstlisting}
754 \begin{lstlisting}
755 print lst2[0][1]
755 print lst2[0][1]
756 print arr2[0,1]
756 print arr2[0,1]
757 \end{lstlisting}
757 \end{lstlisting}
758 \end{codeinput}
758 \end{codeinput}
759 \begin{codeoutput}
759 \begin{codeoutput}
760 \begin{verbatim}
760 \begin{verbatim}
761 2
761 2
762 2
762 2
763 \end{verbatim}
763 \end{verbatim}
764 \end{codeoutput}
764 \end{codeoutput}
765 \end{codecell}
765 \end{codecell}
766 Most of the array creation functions listed above can be used with more
766 Most of the array creation functions listed above can be used with more
767 than one dimension, for example:
767 than one dimension, for example:
768
768
769 \begin{codecell}
769 \begin{codecell}
770 \begin{codeinput}
770 \begin{codeinput}
771 \begin{lstlisting}
771 \begin{lstlisting}
772 np.zeros((2,3))
772 np.zeros((2,3))
773 \end{lstlisting}
773 \end{lstlisting}
774 \end{codeinput}
774 \end{codeinput}
775 \begin{codeoutput}
775 \begin{codeoutput}
776 \begin{verbatim}
776 \begin{verbatim}
777 array([[ 0., 0., 0.],
777 array([[ 0., 0., 0.],
778 [ 0., 0., 0.]])
778 [ 0., 0., 0.]])
779 \end{verbatim}
779 \end{verbatim}
780 \end{codeoutput}
780 \end{codeoutput}
781 \end{codecell}
781 \end{codecell}
782 \begin{codecell}
782 \begin{codecell}
783 \begin{codeinput}
783 \begin{codeinput}
784 \begin{lstlisting}
784 \begin{lstlisting}
785 np.random.normal(10, 3, (2, 4))
785 np.random.normal(10, 3, (2, 4))
786 \end{lstlisting}
786 \end{lstlisting}
787 \end{codeinput}
787 \end{codeinput}
788 \begin{codeoutput}
788 \begin{codeoutput}
789 \begin{verbatim}
789 \begin{verbatim}
790 array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
790 array([[ 11.26788826, 4.29619866, 11.09346496, 9.73861307],
791 [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
791 [ 10.54025996, 9.5146268 , 10.80367214, 13.62204505]])
792 \end{verbatim}
792 \end{verbatim}
793 \end{codeoutput}
793 \end{codeoutput}
794 \end{codecell}
794 \end{codecell}
795 In fact, the shape of an array can be changed at any time, as long as
795 In fact, the shape of an array can be changed at any time, as long as
796 the total number of elements is unchanged. For example, if we want a 2x4
796 the total number of elements is unchanged. For example, if we want a 2x4
797 array with numbers increasing from 0, the easiest way to create it is:
797 array with numbers increasing from 0, the easiest way to create it is:
798
798
799 \begin{codecell}
799 \begin{codecell}
800 \begin{codeinput}
800 \begin{codeinput}
801 \begin{lstlisting}
801 \begin{lstlisting}
802 arr = np.arange(8).reshape(2,4)
802 arr = np.arange(8).reshape(2,4)
803 print arr
803 print arr
804 \end{lstlisting}
804 \end{lstlisting}
805 \end{codeinput}
805 \end{codeinput}
806 \begin{codeoutput}
806 \begin{codeoutput}
807 \begin{verbatim}
807 \begin{verbatim}
808 [[0 1 2 3]
808 [[0 1 2 3]
809 [4 5 6 7]]
809 [4 5 6 7]]
810 \end{verbatim}
810 \end{verbatim}
811 \end{codeoutput}
811 \end{codeoutput}
812 \end{codecell}
812 \end{codecell}
813 With multidimensional arrays, you can also use slices, and you can mix
813 With multidimensional arrays, you can also use slices, and you can mix
814 and match slices and single indices in the different dimensions (using
814 and match slices and single indices in the different dimensions (using
815 the same array as above):
815 the same array as above):
816
816
817 \begin{codecell}
817 \begin{codecell}
818 \begin{codeinput}
818 \begin{codeinput}
819 \begin{lstlisting}
819 \begin{lstlisting}
820 print 'Slicing in the second row:', arr[1, 2:4]
820 print 'Slicing in the second row:', arr[1, 2:4]
821 print 'All rows, third column :', arr[:, 2]
821 print 'All rows, third column :', arr[:, 2]
822 \end{lstlisting}
822 \end{lstlisting}
823 \end{codeinput}
823 \end{codeinput}
824 \begin{codeoutput}
824 \begin{codeoutput}
825 \begin{verbatim}
825 \begin{verbatim}
826 Slicing in the second row: [6 7]
826 Slicing in the second row: [6 7]
827 All rows, third column : [2 6]
827 All rows, third column : [2 6]
828 \end{verbatim}
828 \end{verbatim}
829 \end{codeoutput}
829 \end{codeoutput}
830 \end{codecell}
830 \end{codecell}
831 If you only provide one index, then you will get an array with one less
831 If you only provide one index, then you will get an array with one less
832 dimension containing that row:
832 dimension containing that row:
833
833
834 \begin{codecell}
834 \begin{codecell}
835 \begin{codeinput}
835 \begin{codeinput}
836 \begin{lstlisting}
836 \begin{lstlisting}
837 print 'First row: ', arr[0]
837 print 'First row: ', arr[0]
838 print 'Second row: ', arr[1]
838 print 'Second row: ', arr[1]
839 \end{lstlisting}
839 \end{lstlisting}
840 \end{codeinput}
840 \end{codeinput}
841 \begin{codeoutput}
841 \begin{codeoutput}
842 \begin{verbatim}
842 \begin{verbatim}
843 First row: [0 1 2 3]
843 First row: [0 1 2 3]
844 Second row: [4 5 6 7]
844 Second row: [4 5 6 7]
845 \end{verbatim}
845 \end{verbatim}
846 \end{codeoutput}
846 \end{codeoutput}
847 \end{codecell}
847 \end{codecell}
848 Now that we have seen how to create arrays with more than one dimension,
848 Now that we have seen how to create arrays with more than one dimension,
849 it's a good idea to look at some of the most useful properties and
849 it's a good idea to look at some of the most useful properties and
850 methods that arrays have. The following provide basic information about
850 methods that arrays have. The following provide basic information about
851 the size, shape and data in the array:
851 the size, shape and data in the array:
852
852
853 \begin{codecell}
853 \begin{codecell}
854 \begin{codeinput}
854 \begin{codeinput}
855 \begin{lstlisting}
855 \begin{lstlisting}
856 print 'Data type :', arr.dtype
856 print 'Data type :', arr.dtype
857 print 'Total number of elements :', arr.size
857 print 'Total number of elements :', arr.size
858 print 'Number of dimensions :', arr.ndim
858 print 'Number of dimensions :', arr.ndim
859 print 'Shape (dimensionality) :', arr.shape
859 print 'Shape (dimensionality) :', arr.shape
860 print 'Memory used (in bytes) :', arr.nbytes
860 print 'Memory used (in bytes) :', arr.nbytes
861 \end{lstlisting}
861 \end{lstlisting}
862 \end{codeinput}
862 \end{codeinput}
863 \begin{codeoutput}
863 \begin{codeoutput}
864 \begin{verbatim}
864 \begin{verbatim}
865 Data type : int32
865 Data type : int32
866 Total number of elements : 8
866 Total number of elements : 8
867 Number of dimensions : 2
867 Number of dimensions : 2
868 Shape (dimensionality) : (2, 4)
868 Shape (dimensionality) : (2, 4)
869 Memory used (in bytes) : 32
869 Memory used (in bytes) : 32
870 \end{verbatim}
870 \end{verbatim}
871 \end{codeoutput}
871 \end{codeoutput}
872 \end{codecell}
872 \end{codecell}
873 Arrays also have many useful methods, some especially useful ones are:
873 Arrays also have many useful methods, some especially useful ones are:
874
874
875 \begin{codecell}
875 \begin{codecell}
876 \begin{codeinput}
876 \begin{codeinput}
877 \begin{lstlisting}
877 \begin{lstlisting}
878 print 'Minimum and maximum :', arr.min(), arr.max()
878 print 'Minimum and maximum :', arr.min(), arr.max()
879 print 'Sum and product of all elements :', arr.sum(), arr.prod()
879 print 'Sum and product of all elements :', arr.sum(), arr.prod()
880 print 'Mean and standard deviation :', arr.mean(), arr.std()
880 print 'Mean and standard deviation :', arr.mean(), arr.std()
881 \end{lstlisting}
881 \end{lstlisting}
882 \end{codeinput}
882 \end{codeinput}
883 \begin{codeoutput}
883 \begin{codeoutput}
884 \begin{verbatim}
884 \begin{verbatim}
885 Minimum and maximum : 0 7
885 Minimum and maximum : 0 7
886 Sum and product of all elements : 28 0
886 Sum and product of all elements : 28 0
887 Mean and standard deviation : 3.5 2.29128784748
887 Mean and standard deviation : 3.5 2.29128784748
888 \end{verbatim}
888 \end{verbatim}
889 \end{codeoutput}
889 \end{codeoutput}
890 \end{codecell}
890 \end{codecell}
891 For these methods, the above operations area all computed on all the
891 For these methods, the above operations area all computed on all the
892 elements of the array. But for a multidimensional array, it's possible
892 elements of the array. But for a multidimensional array, it's possible
893 to do the computation along a single dimension, by passing the
893 to do the computation along a single dimension, by passing the
894 \texttt{axis} parameter; for example:
894 \texttt{axis} parameter; for example:
895
895
896 \begin{codecell}
896 \begin{codecell}
897 \begin{codeinput}
897 \begin{codeinput}
898 \begin{lstlisting}
898 \begin{lstlisting}
899 print 'For the following array:\n', arr
899 print 'For the following array:\n', arr
900 print 'The sum of elements along the rows is :', arr.sum(axis=1)
900 print 'The sum of elements along the rows is :', arr.sum(axis=1)
901 print 'The sum of elements along the columns is :', arr.sum(axis=0)
901 print 'The sum of elements along the columns is :', arr.sum(axis=0)
902 \end{lstlisting}
902 \end{lstlisting}
903 \end{codeinput}
903 \end{codeinput}
904 \begin{codeoutput}
904 \begin{codeoutput}
905 \begin{verbatim}
905 \begin{verbatim}
906 For the following array:
906 For the following array:
907 [[0 1 2 3]
907 [[0 1 2 3]
908 [4 5 6 7]]
908 [4 5 6 7]]
909 The sum of elements along the rows is : [ 6 22]
909 The sum of elements along the rows is : [ 6 22]
910 The sum of elements along the columns is : [ 4 6 8 10]
910 The sum of elements along the columns is : [ 4 6 8 10]
911 \end{verbatim}
911 \end{verbatim}
912 \end{codeoutput}
912 \end{codeoutput}
913 \end{codecell}
913 \end{codecell}
914 As you can see in this example, the value of the \texttt{axis} parameter
914 As you can see in this example, the value of the \texttt{axis} parameter
915 is the dimension which will be \emph{consumed} once the operation has
915 is the dimension which will be \emph{consumed} once the operation has
916 been carried out. This is why to sum along the rows we use
916 been carried out. This is why to sum along the rows we use
917 \texttt{axis=0}.
917 \texttt{axis=0}.
918
918
919 This can be easily illustrated with an example that has more dimensions;
919 This can be easily illustrated with an example that has more dimensions;
920 we create an array with 4 dimensions and shape \texttt{(3,4,5,6)} and
920 we create an array with 4 dimensions and shape \texttt{(3,4,5,6)} and
921 sum along the axis number 2 (i.e.~the \emph{third} axis, since in Python
921 sum along the axis number 2 (i.e.~the \emph{third} axis, since in Python
922 all counts are 0-based). That consumes the dimension whose length was 5,
922 all counts are 0-based). That consumes the dimension whose length was 5,
923 leaving us with a new array that has shape \texttt{(3,4,6)}:
923 leaving us with a new array that has shape \texttt{(3,4,6)}:
924
924
925 \begin{codecell}
925 \begin{codecell}
926 \begin{codeinput}
926 \begin{codeinput}
927 \begin{lstlisting}
927 \begin{lstlisting}
928 np.zeros((3,4,5,6)).sum(2).shape
928 np.zeros((3,4,5,6)).sum(2).shape
929 \end{lstlisting}
929 \end{lstlisting}
930 \end{codeinput}
930 \end{codeinput}
931 \begin{codeoutput}
931 \begin{codeoutput}
932 \begin{verbatim}
932 \begin{verbatim}
933 (3, 4, 6)
933 (3, 4, 6)
934 \end{verbatim}
934 \end{verbatim}
935 \end{codeoutput}
935 \end{codeoutput}
936 \end{codecell}
936 \end{codecell}
937 Another widely used property of arrays is the \texttt{.T} attribute,
937 Another widely used property of arrays is the \texttt{.T} attribute,
938 which allows you to access the transpose of the array:
938 which allows you to access the transpose of the array:
939
939
940 \begin{codecell}
940 \begin{codecell}
941 \begin{codeinput}
941 \begin{codeinput}
942 \begin{lstlisting}
942 \begin{lstlisting}
943 print 'Array:\n', arr
943 print 'Array:\n', arr
944 print 'Transpose:\n', arr.T
944 print 'Transpose:\n', arr.T
945 \end{lstlisting}
945 \end{lstlisting}
946 \end{codeinput}
946 \end{codeinput}
947 \begin{codeoutput}
947 \begin{codeoutput}
948 \begin{verbatim}
948 \begin{verbatim}
949 Array:
949 Array:
950 [[0 1 2 3]
950 [[0 1 2 3]
951 [4 5 6 7]]
951 [4 5 6 7]]
952 Transpose:
952 Transpose:
953 [[0 4]
953 [[0 4]
954 [1 5]
954 [1 5]
955 [2 6]
955 [2 6]
956 [3 7]]
956 [3 7]]
957 \end{verbatim}
957 \end{verbatim}
958 \end{codeoutput}
958 \end{codeoutput}
959 \end{codecell}
959 \end{codecell}
960 We don't have time here to look at all the methods and properties of
960 We don't have time here to look at all the methods and properties of
961 arrays, here's a complete list. Simply try exploring some of these
961 arrays, here's a complete list. Simply try exploring some of these
962 IPython to learn more, or read their description in the full Numpy
962 IPython to learn more, or read their description in the full Numpy
963 documentation:
963 documentation:
964
964
965 \begin{verbatim}
965 \begin{verbatim}
966 arr.T arr.copy arr.getfield arr.put arr.squeeze
966 arr.T arr.copy arr.getfield arr.put arr.squeeze
967 arr.all arr.ctypes arr.imag arr.ravel arr.std
967 arr.all arr.ctypes arr.imag arr.ravel arr.std
968 arr.any arr.cumprod arr.item arr.real arr.strides
968 arr.any arr.cumprod arr.item arr.real arr.strides
969 arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
969 arr.argmax arr.cumsum arr.itemset arr.repeat arr.sum
970 arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
970 arr.argmin arr.data arr.itemsize arr.reshape arr.swapaxes
971 arr.argsort arr.diagonal arr.max arr.resize arr.take
971 arr.argsort arr.diagonal arr.max arr.resize arr.take
972 arr.astype arr.dot arr.mean arr.round arr.tofile
972 arr.astype arr.dot arr.mean arr.round arr.tofile
973 arr.base arr.dtype arr.min arr.searchsorted arr.tolist
973 arr.base arr.dtype arr.min arr.searchsorted arr.tolist
974 arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
974 arr.byteswap arr.dump arr.nbytes arr.setasflat arr.tostring
975 arr.choose arr.dumps arr.ndim arr.setfield arr.trace
975 arr.choose arr.dumps arr.ndim arr.setfield arr.trace
976 arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
976 arr.clip arr.fill arr.newbyteorder arr.setflags arr.transpose
977 arr.compress arr.flags arr.nonzero arr.shape arr.var
977 arr.compress arr.flags arr.nonzero arr.shape arr.var
978 arr.conj arr.flat arr.prod arr.size arr.view
978 arr.conj arr.flat arr.prod arr.size arr.view
979 arr.conjugate arr.flatten arr.ptp arr.sort
979 arr.conjugate arr.flatten arr.ptp arr.sort
980 \end{verbatim}
980 \end{verbatim}
981
981
982
982
983 \subsection{Operating with arrays}
983 \subsection{Operating with arrays}
984 Arrays support all regular arithmetic operators, and the numpy library
984 Arrays support all regular arithmetic operators, and the numpy library
985 also contains a complete collection of basic mathematical functions that
985 also contains a complete collection of basic mathematical functions that
986 operate on arrays. It is important to remember that in general, all
986 operate on arrays. It is important to remember that in general, all
987 operations with arrays are applied \emph{element-wise}, i.e., are
987 operations with arrays are applied \emph{element-wise}, i.e., are
988 applied to all the elements of the array at the same time. Consider for
988 applied to all the elements of the array at the same time. Consider for
989 example:
989 example:
990
990
991 \begin{codecell}
991 \begin{codecell}
992 \begin{codeinput}
992 \begin{codeinput}
993 \begin{lstlisting}
993 \begin{lstlisting}
994 arr1 = np.arange(4)
994 arr1 = np.arange(4)
995 arr2 = np.arange(10, 14)
995 arr2 = np.arange(10, 14)
996 print arr1, '+', arr2, '=', arr1+arr2
996 print arr1, '+', arr2, '=', arr1+arr2
997 \end{lstlisting}
997 \end{lstlisting}
998 \end{codeinput}
998 \end{codeinput}
999 \begin{codeoutput}
999 \begin{codeoutput}
1000 \begin{verbatim}
1000 \begin{verbatim}
1001 [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
1001 [0 1 2 3] + [10 11 12 13] = [10 12 14 16]
1002 \end{verbatim}
1002 \end{verbatim}
1003 \end{codeoutput}
1003 \end{codeoutput}
1004 \end{codecell}
1004 \end{codecell}
1005 Importantly, you must remember that even the multiplication operator is
1005 Importantly, you must remember that even the multiplication operator is
1006 by default applied element-wise, it is \emph{not} the matrix
1006 by default applied element-wise, it is \emph{not} the matrix
1007 multiplication from linear algebra (as is the case in Matlab, for
1007 multiplication from linear algebra (as is the case in Matlab, for
1008 example):
1008 example):
1009
1009
1010 \begin{codecell}
1010 \begin{codecell}
1011 \begin{codeinput}
1011 \begin{codeinput}
1012 \begin{lstlisting}
1012 \begin{lstlisting}
1013 print arr1, '*', arr2, '=', arr1*arr2
1013 print arr1, '*', arr2, '=', arr1*arr2
1014 \end{lstlisting}
1014 \end{lstlisting}
1015 \end{codeinput}
1015 \end{codeinput}
1016 \begin{codeoutput}
1016 \begin{codeoutput}
1017 \begin{verbatim}
1017 \begin{verbatim}
1018 [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
1018 [0 1 2 3] * [10 11 12 13] = [ 0 11 24 39]
1019 \end{verbatim}
1019 \end{verbatim}
1020 \end{codeoutput}
1020 \end{codeoutput}
1021 \end{codecell}
1021 \end{codecell}
1022 While this means that in principle arrays must always match in their
1022 While this means that in principle arrays must always match in their
1023 dimensionality in order for an operation to be valid, numpy will
1023 dimensionality in order for an operation to be valid, numpy will
1024 \emph{broadcast} dimensions when possible. For example, suppose that you
1024 \emph{broadcast} dimensions when possible. For example, suppose that you
1025 want to add the number 1.5 to \texttt{arr1}; the following would be a
1025 want to add the number 1.5 to \texttt{arr1}; the following would be a
1026 valid way to do it:
1026 valid way to do it:
1027
1027
1028 \begin{codecell}
1028 \begin{codecell}
1029 \begin{codeinput}
1029 \begin{codeinput}
1030 \begin{lstlisting}
1030 \begin{lstlisting}
1031 arr1 + 1.5*np.ones(4)
1031 arr1 + 1.5*np.ones(4)
1032 \end{lstlisting}
1032 \end{lstlisting}
1033 \end{codeinput}
1033 \end{codeinput}
1034 \begin{codeoutput}
1034 \begin{codeoutput}
1035 \begin{verbatim}
1035 \begin{verbatim}
1036 array([ 1.5, 2.5, 3.5, 4.5])
1036 array([ 1.5, 2.5, 3.5, 4.5])
1037 \end{verbatim}
1037 \end{verbatim}
1038 \end{codeoutput}
1038 \end{codeoutput}
1039 \end{codecell}
1039 \end{codecell}
1040 But thanks to numpy's broadcasting rules, the following is equally
1040 But thanks to numpy's broadcasting rules, the following is equally
1041 valid:
1041 valid:
1042
1042
1043 \begin{codecell}
1043 \begin{codecell}
1044 \begin{codeinput}
1044 \begin{codeinput}
1045 \begin{lstlisting}
1045 \begin{lstlisting}
1046 arr1 + 1.5
1046 arr1 + 1.5
1047 \end{lstlisting}
1047 \end{lstlisting}
1048 \end{codeinput}
1048 \end{codeinput}
1049 \begin{codeoutput}
1049 \begin{codeoutput}
1050 \begin{verbatim}
1050 \begin{verbatim}
1051 array([ 1.5, 2.5, 3.5, 4.5])
1051 array([ 1.5, 2.5, 3.5, 4.5])
1052 \end{verbatim}
1052 \end{verbatim}
1053 \end{codeoutput}
1053 \end{codeoutput}
1054 \end{codecell}
1054 \end{codecell}
1055 In this case, numpy looked at both operands and saw that the first
1055 In this case, numpy looked at both operands and saw that the first
1056 (\texttt{arr1}) was a one-dimensional array of length 4 and the second
1056 (\texttt{arr1}) was a one-dimensional array of length 4 and the second
1057 was a scalar, considered a zero-dimensional object. The broadcasting
1057 was a scalar, considered a zero-dimensional object. The broadcasting
1058 rules allow numpy to:
1058 rules allow numpy to:
1059
1059
1060 \begin{itemize}
1060 \begin{itemize}
1061 \item
1061 \item
1062 \emph{create} new dimensions of length 1 (since this doesn't change
1062 \emph{create} new dimensions of length 1 (since this doesn't change
1063 the size of the array)
1063 the size of the array)
1064 \item
1064 \item
1065 `stretch' a dimension of length 1 that needs to be matched to a
1065 `stretch' a dimension of length 1 that needs to be matched to a
1066 dimension of a different size.
1066 dimension of a different size.
1067 \end{itemize}
1067 \end{itemize}
1068 So in the above example, the scalar 1.5 is effectively:
1068 So in the above example, the scalar 1.5 is effectively:
1069
1069
1070 \begin{itemize}
1070 \begin{itemize}
1071 \item
1071 \item
1072 first `promoted' to a 1-dimensional array of length 1
1072 first `promoted' to a 1-dimensional array of length 1
1073 \item
1073 \item
1074 then, this array is `stretched' to length 4 to match the dimension of
1074 then, this array is `stretched' to length 4 to match the dimension of
1075 \texttt{arr1}.
1075 \texttt{arr1}.
1076 \end{itemize}
1076 \end{itemize}
1077 After these two operations are complete, the addition can proceed as now
1077 After these two operations are complete, the addition can proceed as now
1078 both operands are one-dimensional arrays of length 4.
1078 both operands are one-dimensional arrays of length 4.
1079
1079
1080 This broadcasting behavior is in practice enormously powerful,
1080 This broadcasting behavior is in practice enormously powerful,
1081 especially because when numpy broadcasts to create new dimensions or to
1081 especially because when numpy broadcasts to create new dimensions or to
1082 `stretch' existing ones, it doesn't actually replicate the data. In the
1082 `stretch' existing ones, it doesn't actually replicate the data. In the
1083 example above the operation is carried \emph{as if} the 1.5 was a 1-d
1083 example above the operation is carried \emph{as if} the 1.5 was a 1-d
1084 array with 1.5 in all of its entries, but no actual array was ever
1084 array with 1.5 in all of its entries, but no actual array was ever
1085 created. This can save lots of memory in cases when the arrays in
1085 created. This can save lots of memory in cases when the arrays in
1086 question are large and can have significant performance implications.
1086 question are large and can have significant performance implications.
1087
1087
1088 The general rule is: when operating on two arrays, NumPy compares their
1088 The general rule is: when operating on two arrays, NumPy compares their
1089 shapes element-wise. It starts with the trailing dimensions, and works
1089 shapes element-wise. It starts with the trailing dimensions, and works
1090 its way forward, creating dimensions of length 1 as needed. Two
1090 its way forward, creating dimensions of length 1 as needed. Two
1091 dimensions are considered compatible when
1091 dimensions are considered compatible when
1092
1092
1093 \begin{itemize}
1093 \begin{itemize}
1094 \item
1094 \item
1095 they are equal to begin with, or
1095 they are equal to begin with, or
1096 \item
1096 \item
1097 one of them is 1; in this case numpy will do the `stretching' to make
1097 one of them is 1; in this case numpy will do the `stretching' to make
1098 them equal.
1098 them equal.
1099 \end{itemize}
1099 \end{itemize}
1100 If these conditions are not met, a
1100 If these conditions are not met, a
1101 \texttt{ValueError: frames are not aligned} exception is thrown,
1101 \texttt{ValueError: frames are not aligned} exception is thrown,
1102 indicating that the arrays have incompatible shapes. The size of the
1102 indicating that the arrays have incompatible shapes. The size of the
1103 resulting array is the maximum size along each dimension of the input
1103 resulting array is the maximum size along each dimension of the input
1104 arrays.
1104 arrays.
1105
1105
1106 This shows how the broadcasting rules work in several dimensions:
1106 This shows how the broadcasting rules work in several dimensions:
1107
1107
1108 \begin{codecell}
1108 \begin{codecell}
1109 \begin{codeinput}
1109 \begin{codeinput}
1110 \begin{lstlisting}
1110 \begin{lstlisting}
1111 b = np.array([2, 3, 4, 5])
1111 b = np.array([2, 3, 4, 5])
1112 print arr, '\n\n+', b , '\n----------------\n', arr + b
1112 print arr, '\n\n+', b , '\n----------------\n', arr + b
1113 \end{lstlisting}
1113 \end{lstlisting}
1114 \end{codeinput}
1114 \end{codeinput}
1115 \begin{codeoutput}
1115 \begin{codeoutput}
1116 \begin{verbatim}
1116 \begin{verbatim}
1117 [[0 1 2 3]
1117 [[0 1 2 3]
1118 [4 5 6 7]]
1118 [4 5 6 7]]
1119
1119
1120 + [2 3 4 5]
1120 + [2 3 4 5]
1121 ----------------
1121 ----------------
1122 [[ 2 4 6 8]
1122 [[ 2 4 6 8]
1123 [ 6 8 10 12]]
1123 [ 6 8 10 12]]
1124 \end{verbatim}
1124 \end{verbatim}
1125 \end{codeoutput}
1125 \end{codeoutput}
1126 \end{codecell}
1126 \end{codecell}
1127 Now, how could you use broadcasting to say add \texttt{{[}4, 6{]}} along
1127 Now, how could you use broadcasting to say add \texttt{{[}4, 6{]}} along
1128 the rows to \texttt{arr} above? Simply performing the direct addition
1128 the rows to \texttt{arr} above? Simply performing the direct addition
1129 will produce the error we previously mentioned:
1129 will produce the error we previously mentioned:
1130
1130
1131 \begin{codecell}
1131 \begin{codecell}
1132 \begin{codeinput}
1132 \begin{codeinput}
1133 \begin{lstlisting}
1133 \begin{lstlisting}
1134 c = np.array([4, 6])
1134 c = np.array([4, 6])
1135 arr + c
1135 arr + c
1136 \end{lstlisting}
1136 \end{lstlisting}
1137 \end{codeinput}
1137 \end{codeinput}
1138 \begin{codeoutput}
1138 \begin{codeoutput}
1139 \begin{traceback}
1139 \begin{traceback}
1140 \begin{verbatim}
1140 \begin{verbatim}
1141 ---------------------------------------------------------------------------
1141 ---------------------------------------------------------------------------
1142 ValueError Traceback (most recent call last)
1142 ValueError Traceback (most recent call last)
1143 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
1143 /home/fperez/teach/book-math-labtool/<ipython-input-45-62aa20ac1980> in <module>()
1144 1 c = np.array([4, 6])
1144 1 c = np.array([4, 6])
1145 ----> 2 arr + c
1145 ----> 2 arr + c
1146
1146
1147 ValueError: operands could not be broadcast together with shapes (2,4) (2)
1147 ValueError: operands could not be broadcast together with shapes (2,4) (2)
1148 \end{verbatim}
1148 \end{verbatim}
1149 \end{traceback}
1149 \end{traceback}
1150 \end{codeoutput}
1150 \end{codeoutput}
1151 \end{codecell}
1151 \end{codecell}
1152 According to the rules above, the array \texttt{c} would need to have a
1152 According to the rules above, the array \texttt{c} would need to have a
1153 \emph{trailing} dimension of 1 for the broadcasting to work. It turns
1153 \emph{trailing} dimension of 1 for the broadcasting to work. It turns
1154 out that numpy allows you to `inject' new dimensions anywhere into an
1154 out that numpy allows you to `inject' new dimensions anywhere into an
1155 array on the fly, by indexing it with the special object
1155 array on the fly, by indexing it with the special object
1156 \texttt{np.newaxis}:
1156 \texttt{np.newaxis}:
1157
1157
1158 \begin{codecell}
1158 \begin{codecell}
1159 \begin{codeinput}
1159 \begin{codeinput}
1160 \begin{lstlisting}
1160 \begin{lstlisting}
1161 (c[:, np.newaxis]).shape
1161 (c[:, np.newaxis]).shape
1162 \end{lstlisting}
1162 \end{lstlisting}
1163 \end{codeinput}
1163 \end{codeinput}
1164 \begin{codeoutput}
1164 \begin{codeoutput}
1165 \begin{verbatim}
1165 \begin{verbatim}
1166 (2, 1)
1166 (2, 1)
1167 \end{verbatim}
1167 \end{verbatim}
1168 \end{codeoutput}
1168 \end{codeoutput}
1169 \end{codecell}
1169 \end{codecell}
1170 This is exactly what we need, and indeed it works:
1170 This is exactly what we need, and indeed it works:
1171
1171
1172 \begin{codecell}
1172 \begin{codecell}
1173 \begin{codeinput}
1173 \begin{codeinput}
1174 \begin{lstlisting}
1174 \begin{lstlisting}
1175 arr + c[:, np.newaxis]
1175 arr + c[:, np.newaxis]
1176 \end{lstlisting}
1176 \end{lstlisting}
1177 \end{codeinput}
1177 \end{codeinput}
1178 \begin{codeoutput}
1178 \begin{codeoutput}
1179 \begin{verbatim}
1179 \begin{verbatim}
1180 array([[ 4, 5, 6, 7],
1180 array([[ 4, 5, 6, 7],
1181 [10, 11, 12, 13]])
1181 [10, 11, 12, 13]])
1182 \end{verbatim}
1182 \end{verbatim}
1183 \end{codeoutput}
1183 \end{codeoutput}
1184 \end{codecell}
1184 \end{codecell}
1185 For the full broadcasting rules, please see the official Numpy docs,
1185 For the full broadcasting rules, please see the official Numpy docs,
1186 which describe them in detail and with more complex examples.
1186 which describe them in detail and with more complex examples.
1187
1187
1188 As we mentioned before, Numpy ships with a full complement of
1188 As we mentioned before, Numpy ships with a full complement of
1189 mathematical functions that work on entire arrays, including logarithms,
1189 mathematical functions that work on entire arrays, including logarithms,
1190 exponentials, trigonometric and hyperbolic trigonometric functions, etc.
1190 exponentials, trigonometric and hyperbolic trigonometric functions, etc.
1191 Furthermore, scipy ships a rich special function library in the
1191 Furthermore, scipy ships a rich special function library in the
1192 \texttt{scipy.special} module that includes Bessel, Airy, Fresnel,
1192 \texttt{scipy.special} module that includes Bessel, Airy, Fresnel,
1193 Laguerre and other classical special functions. For example, sampling
1193 Laguerre and other classical special functions. For example, sampling
1194 the sine function at 100 points between $0$ and $2\pi$ is as simple as:
1194 the sine function at 100 points between $0$ and $2\pi$ is as simple as:
1195
1195
1196 \begin{codecell}
1196 \begin{codecell}
1197 \begin{codeinput}
1197 \begin{codeinput}
1198 \begin{lstlisting}
1198 \begin{lstlisting}
1199 x = np.linspace(0, 2*np.pi, 100)
1199 x = np.linspace(0, 2*np.pi, 100)
1200 y = np.sin(x)
1200 y = np.sin(x)
1201 \end{lstlisting}
1201 \end{lstlisting}
1202 \end{codeinput}
1202 \end{codeinput}
1203 \end{codecell}
1203 \end{codecell}
1204 \subsection{Linear algebra in numpy}
1204 \subsection{Linear algebra in numpy}
1205 Numpy ships with a basic linear algebra library, and all arrays have a
1205 Numpy ships with a basic linear algebra library, and all arrays have a
1206 \texttt{dot} method whose behavior is that of the scalar dot product
1206 \texttt{dot} method whose behavior is that of the scalar dot product
1207 when its arguments are vectors (one-dimensional arrays) and the
1207 when its arguments are vectors (one-dimensional arrays) and the
1208 traditional matrix multiplication when one or both of its arguments are
1208 traditional matrix multiplication when one or both of its arguments are
1209 two-dimensional arrays:
1209 two-dimensional arrays:
1210
1210
1211 \begin{codecell}
1211 \begin{codecell}
1212 \begin{codeinput}
1212 \begin{codeinput}
1213 \begin{lstlisting}
1213 \begin{lstlisting}
1214 v1 = np.array([2, 3, 4])
1214 v1 = np.array([2, 3, 4])
1215 v2 = np.array([1, 0, 1])
1215 v2 = np.array([1, 0, 1])
1216 print v1, '.', v2, '=', v1.dot(v2)
1216 print v1, '.', v2, '=', v1.dot(v2)
1217 \end{lstlisting}
1217 \end{lstlisting}
1218 \end{codeinput}
1218 \end{codeinput}
1219 \begin{codeoutput}
1219 \begin{codeoutput}
1220 \begin{verbatim}
1220 \begin{verbatim}
1221 [2 3 4] . [1 0 1] = 6
1221 [2 3 4] . [1 0 1] = 6
1222 \end{verbatim}
1222 \end{verbatim}
1223 \end{codeoutput}
1223 \end{codeoutput}
1224 \end{codecell}
1224 \end{codecell}
1225 Here is a regular matrix-vector multiplication, note that the array
1225 Here is a regular matrix-vector multiplication, note that the array
1226 \texttt{v1} should be viewed as a \emph{column} vector in traditional
1226 \texttt{v1} should be viewed as a \emph{column} vector in traditional
1227 linear algebra notation; numpy makes no distinction between row and
1227 linear algebra notation; numpy makes no distinction between row and
1228 column vectors and simply verifies that the dimensions match the
1228 column vectors and simply verifies that the dimensions match the
1229 required rules of matrix multiplication, in this case we have a
1229 required rules of matrix multiplication, in this case we have a
1230 $2 \times 3$ matrix multiplied by a 3-vector, which produces a 2-vector:
1230 $2 \times 3$ matrix multiplied by a 3-vector, which produces a 2-vector:
1231
1231
1232 \begin{codecell}
1232 \begin{codecell}
1233 \begin{codeinput}
1233 \begin{codeinput}
1234 \begin{lstlisting}
1234 \begin{lstlisting}
1235 A = np.arange(6).reshape(2, 3)
1235 A = np.arange(6).reshape(2, 3)
1236 print A, 'x', v1, '=', A.dot(v1)
1236 print A, 'x', v1, '=', A.dot(v1)
1237 \end{lstlisting}
1237 \end{lstlisting}
1238 \end{codeinput}
1238 \end{codeinput}
1239 \begin{codeoutput}
1239 \begin{codeoutput}
1240 \begin{verbatim}
1240 \begin{verbatim}
1241 [[0 1 2]
1241 [[0 1 2]
1242 [3 4 5]] x [2 3 4] = [11 38]
1242 [3 4 5]] x [2 3 4] = [11 38]
1243 \end{verbatim}
1243 \end{verbatim}
1244 \end{codeoutput}
1244 \end{codeoutput}
1245 \end{codecell}
1245 \end{codecell}
1246 For matrix-matrix multiplication, the same dimension-matching rules must
1246 For matrix-matrix multiplication, the same dimension-matching rules must
1247 be satisfied, e.g.~consider the difference between $A \times A^T$:
1247 be satisfied, e.g.~consider the difference between $A \times A^T$:
1248
1248
1249 \begin{codecell}
1249 \begin{codecell}
1250 \begin{codeinput}
1250 \begin{codeinput}
1251 \begin{lstlisting}
1251 \begin{lstlisting}
1252 print A.dot(A.T)
1252 print A.dot(A.T)
1253 \end{lstlisting}
1253 \end{lstlisting}
1254 \end{codeinput}
1254 \end{codeinput}
1255 \begin{codeoutput}
1255 \begin{codeoutput}
1256 \begin{verbatim}
1256 \begin{verbatim}
1257 [[ 5 14]
1257 [[ 5 14]
1258 [14 50]]
1258 [14 50]]
1259 \end{verbatim}
1259 \end{verbatim}
1260 \end{codeoutput}
1260 \end{codeoutput}
1261 \end{codecell}
1261 \end{codecell}
1262 and $A^T \times A$:
1262 and $A^T \times A$:
1263
1263
1264 \begin{codecell}
1264 \begin{codecell}
1265 \begin{codeinput}
1265 \begin{codeinput}
1266 \begin{lstlisting}
1266 \begin{lstlisting}
1267 print A.T.dot(A)
1267 print A.T.dot(A)
1268 \end{lstlisting}
1268 \end{lstlisting}
1269 \end{codeinput}
1269 \end{codeinput}
1270 \begin{codeoutput}
1270 \begin{codeoutput}
1271 \begin{verbatim}
1271 \begin{verbatim}
1272 [[ 9 12 15]
1272 [[ 9 12 15]
1273 [12 17 22]
1273 [12 17 22]
1274 [15 22 29]]
1274 [15 22 29]]
1275 \end{verbatim}
1275 \end{verbatim}
1276 \end{codeoutput}
1276 \end{codeoutput}
1277 \end{codecell}
1277 \end{codecell}
1278 Furthermore, the \texttt{numpy.linalg} module includes additional
1278 Furthermore, the \texttt{numpy.linalg} module includes additional
1279 functionality such as determinants, matrix norms, Cholesky, eigenvalue
1279 functionality such as determinants, matrix norms, Cholesky, eigenvalue
1280 and singular value decompositions, etc. For even more linear algebra
1280 and singular value decompositions, etc. For even more linear algebra
1281 tools, \texttt{scipy.linalg} contains the majority of the tools in the
1281 tools, \texttt{scipy.linalg} contains the majority of the tools in the
1282 classic LAPACK libraries as well as functions to operate on sparse
1282 classic LAPACK libraries as well as functions to operate on sparse
1283 matrices. We refer the reader to the Numpy and Scipy documentations for
1283 matrices. We refer the reader to the Numpy and Scipy documentations for
1284 additional details on these.
1284 additional details on these.
1285
1285
1286 \subsection{Reading and writing arrays to disk}
1286 \subsection{Reading and writing arrays to disk}
1287 Numpy lets you read and write arrays into files in a number of ways. In
1287 Numpy lets you read and write arrays into files in a number of ways. In
1288 order to use these tools well, it is critical to understand the
1288 order to use these tools well, it is critical to understand the
1289 difference between a \emph{text} and a \emph{binary} file containing
1289 difference between a \emph{text} and a \emph{binary} file containing
1290 numerical data. In a text file, the number $\pi$ could be written as
1290 numerical data. In a text file, the number $\pi$ could be written as
1291 ``3.141592653589793'', for example: a string of digits that a human can
1291 ``3.141592653589793'', for example: a string of digits that a human can
1292 read, with in this case 15 decimal digits. In contrast, that same number
1292 read, with in this case 15 decimal digits. In contrast, that same number
1293 written to a binary file would be encoded as 8 characters (bytes) that
1293 written to a binary file would be encoded as 8 characters (bytes) that
1294 are not readable by a human but which contain the exact same data that
1294 are not readable by a human but which contain the exact same data that
1295 the variable \texttt{pi} had in the computer's memory.
1295 the variable \texttt{pi} had in the computer's memory.
1296
1296
1297 The tradeoffs between the two modes are thus:
1297 The tradeoffs between the two modes are thus:
1298
1298
1299 \begin{itemize}
1299 \begin{itemize}
1300 \item
1300 \item
1301 Text mode: occupies more space, precision can be lost (if not all
1301 Text mode: occupies more space, precision can be lost (if not all
1302 digits are written to disk), but is readable and editable by hand with
1302 digits are written to disk), but is readable and editable by hand with
1303 a text editor. Can \emph{only} be used for one- and two-dimensional
1303 a text editor. Can \emph{only} be used for one- and two-dimensional
1304 arrays.
1304 arrays.
1305 \item
1305 \item
1306 Binary mode: compact and exact representation of the data in memory,
1306 Binary mode: compact and exact representation of the data in memory,
1307 can't be read or edited by hand. Arrays of any size and dimensionality
1307 can't be read or edited by hand. Arrays of any size and dimensionality
1308 can be saved and read without loss of information.
1308 can be saved and read without loss of information.
1309 \end{itemize}
1309 \end{itemize}
1310 First, let's see how to read and write arrays in text mode. The
1310 First, let's see how to read and write arrays in text mode. The
1311 \texttt{np.savetxt} function saves an array to a text file, with options
1311 \texttt{np.savetxt} function saves an array to a text file, with options
1312 to control the precision, separators and even adding a header:
1312 to control the precision, separators and even adding a header:
1313
1313
1314 \begin{codecell}
1314 \begin{codecell}
1315 \begin{codeinput}
1315 \begin{codeinput}
1316 \begin{lstlisting}
1316 \begin{lstlisting}
1317 arr = np.arange(10).reshape(2, 5)
1317 arr = np.arange(10).reshape(2, 5)
1318 np.savetxt('test.out', arr, fmt='%.2e', header="My dataset")
1318 np.savetxt('test.out', arr, fmt='%.2e', header="My dataset")
1319 !cat test.out
1319 !cat test.out
1320 \end{lstlisting}
1320 \end{lstlisting}
1321 \end{codeinput}
1321 \end{codeinput}
1322 \begin{codeoutput}
1322 \begin{codeoutput}
1323 \begin{verbatim}
1323 \begin{verbatim}
1324 # My dataset
1324 # My dataset
1325 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
1325 0.00e+00 1.00e+00 2.00e+00 3.00e+00 4.00e+00
1326 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
1326 5.00e+00 6.00e+00 7.00e+00 8.00e+00 9.00e+00
1327 \end{verbatim}
1327 \end{verbatim}
1328 \end{codeoutput}
1328 \end{codeoutput}
1329 \end{codecell}
1329 \end{codecell}
1330 And this same type of file can then be read with the matching
1330 And this same type of file can then be read with the matching
1331 \texttt{np.loadtxt} function:
1331 \texttt{np.loadtxt} function:
1332
1332
1333 \begin{codecell}
1333 \begin{codecell}
1334 \begin{codeinput}
1334 \begin{codeinput}
1335 \begin{lstlisting}
1335 \begin{lstlisting}
1336 arr2 = np.loadtxt('test.out')
1336 arr2 = np.loadtxt('test.out')
1337 print arr2
1337 print arr2
1338 \end{lstlisting}
1338 \end{lstlisting}
1339 \end{codeinput}
1339 \end{codeinput}
1340 \begin{codeoutput}
1340 \begin{codeoutput}
1341 \begin{verbatim}
1341 \begin{verbatim}
1342 [[ 0. 1. 2. 3. 4.]
1342 [[ 0. 1. 2. 3. 4.]
1343 [ 5. 6. 7. 8. 9.]]
1343 [ 5. 6. 7. 8. 9.]]
1344 \end{verbatim}
1344 \end{verbatim}
1345 \end{codeoutput}
1345 \end{codeoutput}
1346 \end{codecell}
1346 \end{codecell}
1347 For binary data, Numpy provides the \texttt{np.save} and
1347 For binary data, Numpy provides the \texttt{np.save} and
1348 \texttt{np.savez} routines. The first saves a single array to a file
1348 \texttt{np.savez} routines. The first saves a single array to a file
1349 with \texttt{.npy} extension, while the latter can be used to save a
1349 with \texttt{.npy} extension, while the latter can be used to save a
1350 \emph{group} of arrays into a single file with \texttt{.npz} extension.
1350 \emph{group} of arrays into a single file with \texttt{.npz} extension.
1351 The files created with these routines can then be read with the
1351 The files created with these routines can then be read with the
1352 \texttt{np.load} function.
1352 \texttt{np.load} function.
1353
1353
1354 Let us first see how to use the simpler \texttt{np.save} function to
1354 Let us first see how to use the simpler \texttt{np.save} function to
1355 save a single array:
1355 save a single array:
1356
1356
1357 \begin{codecell}
1357 \begin{codecell}
1358 \begin{codeinput}
1358 \begin{codeinput}
1359 \begin{lstlisting}
1359 \begin{lstlisting}
1360 np.save('test.npy', arr2)
1360 np.save('test.npy', arr2)
1361 # Now we read this back
1361 # Now we read this back
1362 arr2n = np.load('test.npy')
1362 arr2n = np.load('test.npy')
1363 # Let's see if any element is non-zero in the difference.
1363 # Let's see if any element is non-zero in the difference.
1364 # A value of True would be a problem.
1364 # A value of True would be a problem.
1365 print 'Any differences?', np.any(arr2-arr2n)
1365 print 'Any differences?', np.any(arr2-arr2n)
1366 \end{lstlisting}
1366 \end{lstlisting}
1367 \end{codeinput}
1367 \end{codeinput}
1368 \begin{codeoutput}
1368 \begin{codeoutput}
1369 \begin{verbatim}
1369 \begin{verbatim}
1370 Any differences? False
1370 Any differences? False
1371 \end{verbatim}
1371 \end{verbatim}
1372 \end{codeoutput}
1372 \end{codeoutput}
1373 \end{codecell}
1373 \end{codecell}
1374 Now let us see how the \texttt{np.savez} function works. You give it a
1374 Now let us see how the \texttt{np.savez} function works. You give it a
1375 filename and either a sequence of arrays or a set of keywords. In the
1375 filename and either a sequence of arrays or a set of keywords. In the
1376 first mode, the function will auotmatically name the saved arrays in the
1376 first mode, the function will auotmatically name the saved arrays in the
1377 archive as \texttt{arr\_0}, \texttt{arr\_1}, etc:
1377 archive as \texttt{arr\_0}, \texttt{arr\_1}, etc:
1378
1378
1379 \begin{codecell}
1379 \begin{codecell}
1380 \begin{codeinput}
1380 \begin{codeinput}
1381 \begin{lstlisting}
1381 \begin{lstlisting}
1382 np.savez('test.npz', arr, arr2)
1382 np.savez('test.npz', arr, arr2)
1383 arrays = np.load('test.npz')
1383 arrays = np.load('test.npz')
1384 arrays.files
1384 arrays.files
1385 \end{lstlisting}
1385 \end{lstlisting}
1386 \end{codeinput}
1386 \end{codeinput}
1387 \begin{codeoutput}
1387 \begin{codeoutput}
1388 \begin{verbatim}
1388 \begin{verbatim}
1389 ['arr_1', 'arr_0']
1389 ['arr_1', 'arr_0']
1390 \end{verbatim}
1390 \end{verbatim}
1391 \end{codeoutput}
1391 \end{codeoutput}
1392 \end{codecell}
1392 \end{codecell}
1393 Alternatively, we can explicitly choose how to name the arrays we save:
1393 Alternatively, we can explicitly choose how to name the arrays we save:
1394
1394
1395 \begin{codecell}
1395 \begin{codecell}
1396 \begin{codeinput}
1396 \begin{codeinput}
1397 \begin{lstlisting}
1397 \begin{lstlisting}
1398 np.savez('test.npz', array1=arr, array2=arr2)
1398 np.savez('test.npz', array1=arr, array2=arr2)
1399 arrays = np.load('test.npz')
1399 arrays = np.load('test.npz')
1400 arrays.files
1400 arrays.files
1401 \end{lstlisting}
1401 \end{lstlisting}
1402 \end{codeinput}
1402 \end{codeinput}
1403 \begin{codeoutput}
1403 \begin{codeoutput}
1404 \begin{verbatim}
1404 \begin{verbatim}
1405 ['array2', 'array1']
1405 ['array2', 'array1']
1406 \end{verbatim}
1406 \end{verbatim}
1407 \end{codeoutput}
1407 \end{codeoutput}
1408 \end{codecell}
1408 \end{codecell}
1409 The object returned by \texttt{np.load} from an \texttt{.npz} file works
1409 The object returned by \texttt{np.load} from an \texttt{.npz} file works
1410 like a dictionary, though you can also access its constituent files by
1410 like a dictionary, though you can also access its constituent files by
1411 attribute using its special \texttt{.f} field; this is best illustrated
1411 attribute using its special \texttt{.f} field; this is best illustrated
1412 with an example with the \texttt{arrays} object from above:
1412 with an example with the \texttt{arrays} object from above:
1413
1413
1414 \begin{codecell}
1414 \begin{codecell}
1415 \begin{codeinput}
1415 \begin{codeinput}
1416 \begin{lstlisting}
1416 \begin{lstlisting}
1417 print 'First row of first array:', arrays['array1'][0]
1417 print 'First row of first array:', arrays['array1'][0]
1418 # This is an equivalent way to get the same field
1418 # This is an equivalent way to get the same field
1419 print 'First row of first array:', arrays.f.array1[0]
1419 print 'First row of first array:', arrays.f.array1[0]
1420 \end{lstlisting}
1420 \end{lstlisting}
1421 \end{codeinput}
1421 \end{codeinput}
1422 \begin{codeoutput}
1422 \begin{codeoutput}
1423 \begin{verbatim}
1423 \begin{verbatim}
1424 First row of first array: [0 1 2 3 4]
1424 First row of first array: [0 1 2 3 4]
1425 First row of first array: [0 1 2 3 4]
1425 First row of first array: [0 1 2 3 4]
1426 \end{verbatim}
1426 \end{verbatim}
1427 \end{codeoutput}
1427 \end{codeoutput}
1428 \end{codecell}
1428 \end{codecell}
1429 This \texttt{.npz} format is a very convenient way to package compactly
1429 This \texttt{.npz} format is a very convenient way to package compactly
1430 and without loss of information, into a single file, a group of related
1430 and without loss of information, into a single file, a group of related
1431 arrays that pertain to a specific problem. At some point, however, the
1431 arrays that pertain to a specific problem. At some point, however, the
1432 complexity of your dataset may be such that the optimal approach is to
1432 complexity of your dataset may be such that the optimal approach is to
1433 use one of the standard formats in scientific data processing that have
1433 use one of the standard formats in scientific data processing that have
1434 been designed to handle complex datasets, such as NetCDF or HDF5.
1434 been designed to handle complex datasets, such as NetCDF or HDF5.
1435
1435
1436 Fortunately, there are tools for manipulating these formats in Python,
1436 Fortunately, there are tools for manipulating these formats in Python,
1437 and for storing data in other ways such as databases. A complete
1437 and for storing data in other ways such as databases. A complete
1438 discussion of the possibilities is beyond the scope of this discussion,
1438 discussion of the possibilities is beyond the scope of this discussion,
1439 but of particular interest for scientific users we at least mention the
1439 but of particular interest for scientific users we at least mention the
1440 following:
1440 following:
1441
1441
1442 \begin{itemize}
1442 \begin{itemize}
1443 \item
1443 \item
1444 The \texttt{scipy.io} module contains routines to read and write
1444 The \texttt{scipy.io} module contains routines to read and write
1445 Matlab files in \texttt{.mat} format and files in the NetCDF format
1445 Matlab files in \texttt{.mat} format and files in the NetCDF format
1446 that is widely used in certain scientific disciplines.
1446 that is widely used in certain scientific disciplines.
1447 \item
1447 \item
1448 For manipulating files in the HDF5 format, there are two excellent
1448 For manipulating files in the HDF5 format, there are two excellent
1449 options in Python: The PyTables project offers a high-level, object
1449 options in Python: The PyTables project offers a high-level, object
1450 oriented approach to manipulating HDF5 datasets, while the h5py
1450 oriented approach to manipulating HDF5 datasets, while the h5py
1451 project offers a more direct mapping to the standard HDF5 library
1451 project offers a more direct mapping to the standard HDF5 library
1452 interface. Both are excellent tools; if you need to work with HDF5
1452 interface. Both are excellent tools; if you need to work with HDF5
1453 datasets you should read some of their documentation and examples and
1453 datasets you should read some of their documentation and examples and
1454 decide which approach is a better match for your needs.
1454 decide which approach is a better match for your needs.
1455 \end{itemize}
1455 \end{itemize}
1456
1456
1457 \section{High quality data visualization with Matplotlib}
1457 \section{High quality data visualization with Matplotlib}
1458 The \href{http://matplotlib.sf.net}{matplotlib} library is a powerful
1458 The \href{http://matplotlib.sf.net}{matplotlib} library is a powerful
1459 tool capable of producing complex publication-quality figures with fine
1459 tool capable of producing complex publication-quality figures with fine
1460 layout control in two and three dimensions; here we will only provide a
1460 layout control in two and three dimensions; here we will only provide a
1461 minimal self-contained introduction to its usage that covers the
1461 minimal self-contained introduction to its usage that covers the
1462 functionality needed for the rest of the book. We encourage the reader
1462 functionality needed for the rest of the book. We encourage the reader
1463 to read the tutorials included with the matplotlib documentation as well
1463 to read the tutorials included with the matplotlib documentation as well
1464 as to browse its extensive gallery of examples that include source code.
1464 as to browse its extensive gallery of examples that include source code.
1465
1465
1466 Just as we typically use the shorthand \texttt{np} for Numpy, we will
1466 Just as we typically use the shorthand \texttt{np} for Numpy, we will
1467 use \texttt{plt} for the \texttt{matplotlib.pyplot} module where the
1467 use \texttt{plt} for the \texttt{matplotlib.pyplot} module where the
1468 easy-to-use plotting functions reside (the library contains a rich
1468 easy-to-use plotting functions reside (the library contains a rich
1469 object-oriented architecture that we don't have the space to discuss
1469 object-oriented architecture that we don't have the space to discuss
1470 here):
1470 here):
1471
1471
1472 \begin{codecell}
1472 \begin{codecell}
1473 \begin{codeinput}
1473 \begin{codeinput}
1474 \begin{lstlisting}
1474 \begin{lstlisting}
1475 import matplotlib.pyplot as plt
1475 import matplotlib.pyplot as plt
1476 \end{lstlisting}
1476 \end{lstlisting}
1477 \end{codeinput}
1477 \end{codeinput}
1478 \end{codecell}
1478 \end{codecell}
1479 The most frequently used function is simply called \texttt{plot}, here
1479 The most frequently used function is simply called \texttt{plot}, here
1480 is how you can make a simple plot of $\sin(x)$ for $x \in [0, 2\pi]$
1480 is how you can make a simple plot of $\sin(x)$ for $x \in [0, 2\pi]$
1481 with labels and a grid (we use the semicolon in the last line to
1481 with labels and a grid (we use the semicolon in the last line to
1482 suppress the display of some information that is unnecessary right now):
1482 suppress the display of some information that is unnecessary right now):
1483
1483
1484 \begin{codecell}
1484 \begin{codecell}
1485 \begin{codeinput}
1485 \begin{codeinput}
1486 \begin{lstlisting}
1486 \begin{lstlisting}
1487 x = np.linspace(0, 2*np.pi)
1487 x = np.linspace(0, 2*np.pi)
1488 y = np.sin(x)
1488 y = np.sin(x)
1489 plt.plot(x,y, label='sin(x)')
1489 plt.plot(x,y, label='sin(x)')
1490 plt.legend()
1490 plt.legend()
1491 plt.grid()
1491 plt.grid()
1492 plt.title('Harmonic')
1492 plt.title('Harmonic')
1493 plt.xlabel('x')
1493 plt.xlabel('x')
1494 plt.ylabel('y');
1494 plt.ylabel('y');
1495 \end{lstlisting}
1495 \end{lstlisting}
1496 \end{codeinput}
1496 \end{codeinput}
1497 \begin{codeoutput}
1497 \begin{codeoutput}
1498 \begin{center}
1498 \begin{center}
1499 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.pdf}
1499 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_01.pdf}
1500 \par
1500 \par
1501 \end{center}
1501 \end{center}
1502 \end{codeoutput}
1502 \end{codeoutput}
1503 \end{codecell}
1503 \end{codecell}
1504 You can control the style, color and other properties of the markers,
1504 You can control the style, color and other properties of the markers,
1505 for example:
1505 for example:
1506
1506
1507 \begin{codecell}
1507 \begin{codecell}
1508 \begin{codeinput}
1508 \begin{codeinput}
1509 \begin{lstlisting}
1509 \begin{lstlisting}
1510 plt.plot(x, y, linewidth=2);
1510 plt.plot(x, y, linewidth=2);
1511 \end{lstlisting}
1511 \end{lstlisting}
1512 \end{codeinput}
1512 \end{codeinput}
1513 \begin{codeoutput}
1513 \begin{codeoutput}
1514 \begin{center}
1514 \begin{center}
1515 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.pdf}
1515 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_02.pdf}
1516 \par
1516 \par
1517 \end{center}
1517 \end{center}
1518 \end{codeoutput}
1518 \end{codeoutput}
1519 \end{codecell}
1519 \end{codecell}
1520 \begin{codecell}
1520 \begin{codecell}
1521 \begin{codeinput}
1521 \begin{codeinput}
1522 \begin{lstlisting}
1522 \begin{lstlisting}
1523 plt.plot(x, y, 'o', markersize=5, color='r');
1523 plt.plot(x, y, 'o', markersize=5, color='r');
1524 \end{lstlisting}
1524 \end{lstlisting}
1525 \end{codeinput}
1525 \end{codeinput}
1526 \begin{codeoutput}
1526 \begin{codeoutput}
1527 \begin{center}
1527 \begin{center}
1528 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.pdf}
1528 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_03.pdf}
1529 \par
1529 \par
1530 \end{center}
1530 \end{center}
1531 \end{codeoutput}
1531 \end{codeoutput}
1532 \end{codecell}
1532 \end{codecell}
1533 We will now see how to create a few other common plot types, such as a
1533 We will now see how to create a few other common plot types, such as a
1534 simple error plot:
1534 simple error plot:
1535
1535
1536 \begin{codecell}
1536 \begin{codecell}
1537 \begin{codeinput}
1537 \begin{codeinput}
1538 \begin{lstlisting}
1538 \begin{lstlisting}
1539 # example data
1539 # example data
1540 x = np.arange(0.1, 4, 0.5)
1540 x = np.arange(0.1, 4, 0.5)
1541 y = np.exp(-x)
1541 y = np.exp(-x)
1542
1542
1543 # example variable error bar values
1543 # example variable error bar values
1544 yerr = 0.1 + 0.2*np.sqrt(x)
1544 yerr = 0.1 + 0.2*np.sqrt(x)
1545 xerr = 0.1 + yerr
1545 xerr = 0.1 + yerr
1546
1546
1547 # First illustrate basic pyplot interface, using defaults where possible.
1547 # First illustrate basic pyplot interface, using defaults where possible.
1548 plt.figure()
1548 plt.figure()
1549 plt.errorbar(x, y, xerr=0.2, yerr=0.4)
1549 plt.errorbar(x, y, xerr=0.2, yerr=0.4)
1550 plt.title("Simplest errorbars, 0.2 in x, 0.4 in y");
1550 plt.title("Simplest errorbars, 0.2 in x, 0.4 in y");
1551 \end{lstlisting}
1551 \end{lstlisting}
1552 \end{codeinput}
1552 \end{codeinput}
1553 \begin{codeoutput}
1553 \begin{codeoutput}
1554 \begin{center}
1554 \begin{center}
1555 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.pdf}
1555 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_04.pdf}
1556 \par
1556 \par
1557 \end{center}
1557 \end{center}
1558 \end{codeoutput}
1558 \end{codeoutput}
1559 \end{codecell}
1559 \end{codecell}
1560 A simple log plot
1560 A simple log plot
1561
1561
1562 \begin{codecell}
1562 \begin{codecell}
1563 \begin{codeinput}
1563 \begin{codeinput}
1564 \begin{lstlisting}
1564 \begin{lstlisting}
1565 x = np.linspace(-5, 5)
1565 x = np.linspace(-5, 5)
1566 y = np.exp(-x**2)
1566 y = np.exp(-x**2)
1567 plt.semilogy(x, y);
1567 plt.semilogy(x, y);
1568 \end{lstlisting}
1568 \end{lstlisting}
1569 \end{codeinput}
1569 \end{codeinput}
1570 \begin{codeoutput}
1570 \begin{codeoutput}
1571 \begin{center}
1571 \begin{center}
1572 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.pdf}
1572 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_05.pdf}
1573 \par
1573 \par
1574 \end{center}
1574 \end{center}
1575 \end{codeoutput}
1575 \end{codeoutput}
1576 \end{codecell}
1576 \end{codecell}
1577 A histogram annotated with text inside the plot, using the \texttt{text}
1577 A histogram annotated with text inside the plot, using the \texttt{text}
1578 function:
1578 function:
1579
1579
1580 \begin{codecell}
1580 \begin{codecell}
1581 \begin{codeinput}
1581 \begin{codeinput}
1582 \begin{lstlisting}
1582 \begin{lstlisting}
1583 mu, sigma = 100, 15
1583 mu, sigma = 100, 15
1584 x = mu + sigma * np.random.randn(10000)
1584 x = mu + sigma * np.random.randn(10000)
1585
1585
1586 # the histogram of the data
1586 # the histogram of the data
1587 n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)
1587 n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)
1588
1588
1589 plt.xlabel('Smarts')
1589 plt.xlabel('Smarts')
1590 plt.ylabel('Probability')
1590 plt.ylabel('Probability')
1591 plt.title('Histogram of IQ')
1591 plt.title('Histogram of IQ')
1592 # This will put a text fragment at the position given:
1592 # This will put a text fragment at the position given:
1593 plt.text(55, .027, r'$\mu=100,\ \sigma=15$', fontsize=14)
1593 plt.text(55, .027, r'$\mu=100,\ \sigma=15$', fontsize=14)
1594 plt.axis([40, 160, 0, 0.03])
1594 plt.axis([40, 160, 0, 0.03])
1595 plt.grid(True)
1595 plt.grid(True)
1596 \end{lstlisting}
1596 \end{lstlisting}
1597 \end{codeinput}
1597 \end{codeinput}
1598 \begin{codeoutput}
1598 \begin{codeoutput}
1599 \begin{center}
1599 \begin{center}
1600 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.pdf}
1600 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_06.pdf}
1601 \par
1601 \par
1602 \end{center}
1602 \end{center}
1603 \end{codeoutput}
1603 \end{codeoutput}
1604 \end{codecell}
1604 \end{codecell}
1605 \subsection{Image display}
1605 \subsection{Image display}
1606 The \texttt{imshow} command can display single or multi-channel images.
1606 The \texttt{imshow} command can display single or multi-channel images.
1607 A simple array of random numbers, plotted in grayscale:
1607 A simple array of random numbers, plotted in grayscale:
1608
1608
1609 \begin{codecell}
1609 \begin{codecell}
1610 \begin{codeinput}
1610 \begin{codeinput}
1611 \begin{lstlisting}
1611 \begin{lstlisting}
1612 from matplotlib import cm
1612 from matplotlib import cm
1613 plt.imshow(np.random.rand(5, 10), cmap=cm.gray, interpolation='nearest');
1613 plt.imshow(np.random.rand(5, 10), cmap=cm.gray, interpolation='nearest');
1614 \end{lstlisting}
1614 \end{lstlisting}
1615 \end{codeinput}
1615 \end{codeinput}
1616 \begin{codeoutput}
1616 \begin{codeoutput}
1617 \begin{center}
1617 \begin{center}
1618 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.pdf}
1618 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_07.pdf}
1619 \par
1619 \par
1620 \end{center}
1620 \end{center}
1621 \end{codeoutput}
1621 \end{codeoutput}
1622 \end{codecell}
1622 \end{codecell}
1623 A real photograph is a multichannel image, \texttt{imshow} interprets it
1623 A real photograph is a multichannel image, \texttt{imshow} interprets it
1624 correctly:
1624 correctly:
1625
1625
1626 \begin{codecell}
1626 \begin{codecell}
1627 \begin{codeinput}
1627 \begin{codeinput}
1628 \begin{lstlisting}
1628 \begin{lstlisting}
1629 img = plt.imread('stinkbug.png')
1629 img = plt.imread('stinkbug.png')
1630 print 'Dimensions of the array img:', img.shape
1630 print 'Dimensions of the array img:', img.shape
1631 plt.imshow(img);
1631 plt.imshow(img);
1632 \end{lstlisting}
1632 \end{lstlisting}
1633 \end{codeinput}
1633 \end{codeinput}
1634 \begin{codeoutput}
1634 \begin{codeoutput}
1635 \begin{verbatim}
1635 \begin{verbatim}
1636 Dimensions of the array img: (375, 500, 3)
1636 Dimensions of the array img: (375, 500, 3)
1637 \end{verbatim}
1637 \end{verbatim}
1638 \begin{center}
1638 \begin{center}
1639 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.pdf}
1639 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_08.pdf}
1640 \par
1640 \par
1641 \end{center}
1641 \end{center}
1642 \end{codeoutput}
1642 \end{codeoutput}
1643 \end{codecell}
1643 \end{codecell}
1644 \subsection{Simple 3d plotting with matplotlib}
1644 \subsection{Simple 3d plotting with matplotlib}
1645 Note that you must execute at least once in your session:
1645 Note that you must execute at least once in your session:
1646
1646
1647 \begin{codecell}
1647 \begin{codecell}
1648 \begin{codeinput}
1648 \begin{codeinput}
1649 \begin{lstlisting}
1649 \begin{lstlisting}
1650 from mpl_toolkits.mplot3d import Axes3D
1650 from mpl_toolkits.mplot3d import Axes3D
1651 \end{lstlisting}
1651 \end{lstlisting}
1652 \end{codeinput}
1652 \end{codeinput}
1653 \end{codecell}
1653 \end{codecell}
1654 One this has been done, you can create 3d axes with the
1654 One this has been done, you can create 3d axes with the
1655 \texttt{projection='3d'} keyword to \texttt{add\_subplot}:
1655 \texttt{projection='3d'} keyword to \texttt{add\_subplot}:
1656
1656
1657 \begin{verbatim}
1657 \begin{verbatim}
1658 fig = plt.figure()
1658 fig = plt.figure()
1659 fig.add_subplot(<other arguments here>, projection='3d')
1659 fig.add_subplot(<other arguments here>, projection='3d')
1660 \end{verbatim}
1660 \end{verbatim}
1661
1661
1662
1662
1663 A simple surface plot:
1663 A simple surface plot:
1664
1664
1665 \begin{codecell}
1665 \begin{codecell}
1666 \begin{codeinput}
1666 \begin{codeinput}
1667 \begin{lstlisting}
1667 \begin{lstlisting}
1668 from mpl_toolkits.mplot3d.axes3d import Axes3D
1668 from mpl_toolkits.mplot3d.axes3d import Axes3D
1669 from matplotlib import cm
1669 from matplotlib import cm
1670
1670
1671 fig = plt.figure()
1671 fig = plt.figure()
1672 ax = fig.add_subplot(1, 1, 1, projection='3d')
1672 ax = fig.add_subplot(1, 1, 1, projection='3d')
1673 X = np.arange(-5, 5, 0.25)
1673 X = np.arange(-5, 5, 0.25)
1674 Y = np.arange(-5, 5, 0.25)
1674 Y = np.arange(-5, 5, 0.25)
1675 X, Y = np.meshgrid(X, Y)
1675 X, Y = np.meshgrid(X, Y)
1676 R = np.sqrt(X**2 + Y**2)
1676 R = np.sqrt(X**2 + Y**2)
1677 Z = np.sin(R)
1677 Z = np.sin(R)
1678 surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet,
1678 surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.jet,
1679 linewidth=0, antialiased=False)
1679 linewidth=0, antialiased=False)
1680 ax.set_zlim3d(-1.01, 1.01);
1680 ax.set_zlim3d(-1.01, 1.01);
1681 \end{lstlisting}
1681 \end{lstlisting}
1682 \end{codeinput}
1682 \end{codeinput}
1683 \begin{codeoutput}
1683 \begin{codeoutput}
1684 \begin{center}
1684 \begin{center}
1685 \includegraphics[width=6in]{/Users/bussonniermatthias/nbconvert/tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.pdf}
1685 \includegraphics[width=6in]{tests/ipynbref/IntroNumPy.orig_files/IntroNumPy.orig_fig_09.pdf}
1686 \par
1686 \par
1687 \end{center}
1687 \end{center}
1688 \end{codeoutput}
1688 \end{codeoutput}
1689 \end{codecell}
1689 \end{codecell}
1690 \section{IPython: a powerful interactive environment}
1690 \section{IPython: a powerful interactive environment}
1691 A key component of the everyday workflow of most scientific computing
1691 A key component of the everyday workflow of most scientific computing
1692 environments is a good interactive environment, that is, a system in
1692 environments is a good interactive environment, that is, a system in
1693 which you can execute small amounts of code and view the results
1693 which you can execute small amounts of code and view the results
1694 immediately, combining both printing out data and opening graphical
1694 immediately, combining both printing out data and opening graphical
1695 visualizations. All modern systems for scientific computing, commercial
1695 visualizations. All modern systems for scientific computing, commercial
1696 and open source, include such functionality.
1696 and open source, include such functionality.
1697
1697
1698 Out of the box, Python also offers a simple interactive shell with very
1698 Out of the box, Python also offers a simple interactive shell with very
1699 limited capabilities. But just like the scientific community built Numpy
1699 limited capabilities. But just like the scientific community built Numpy
1700 to provide arrays suited for scientific work (since Pytyhon's lists
1700 to provide arrays suited for scientific work (since Pytyhon's lists
1701 aren't optimal for this task), it has also developed an interactive
1701 aren't optimal for this task), it has also developed an interactive
1702 environment much more sophisticated than the built-in one. The
1702 environment much more sophisticated than the built-in one. The
1703 \href{http://ipython.org}{IPython project} offers a set of tools to make
1703 \href{http://ipython.org}{IPython project} offers a set of tools to make
1704 productive use of the Python language, all the while working
1704 productive use of the Python language, all the while working
1705 interactively and with immedate feedback on your results. The basic
1705 interactively and with immedate feedback on your results. The basic
1706 tools that IPython provides are:
1706 tools that IPython provides are:
1707
1707
1708 \begin{enumerate}[1.]
1708 \begin{enumerate}[1.]
1709 \item
1709 \item
1710 A powerful terminal shell, with many features designed to increase the
1710 A powerful terminal shell, with many features designed to increase the
1711 fluidity and productivity of everyday scientific workflows, including:
1711 fluidity and productivity of everyday scientific workflows, including:
1712
1712
1713 \begin{itemize}
1713 \begin{itemize}
1714 \item
1714 \item
1715 rich introspection of all objects and variables including easy
1715 rich introspection of all objects and variables including easy
1716 access to the source code of any function
1716 access to the source code of any function
1717 \item
1717 \item
1718 powerful and extensible tab completion of variables and filenames,
1718 powerful and extensible tab completion of variables and filenames,
1719 \item
1719 \item
1720 tight integration with matplotlib, supporting interactive figures
1720 tight integration with matplotlib, supporting interactive figures
1721 that don't block the terminal,
1721 that don't block the terminal,
1722 \item
1722 \item
1723 direct access to the filesystem and underlying operating system,
1723 direct access to the filesystem and underlying operating system,
1724 \item
1724 \item
1725 an extensible system for shell-like commands called `magics' that
1725 an extensible system for shell-like commands called `magics' that
1726 reduce the work needed to perform many common tasks,
1726 reduce the work needed to perform many common tasks,
1727 \item
1727 \item
1728 tools for easily running, timing, profiling and debugging your
1728 tools for easily running, timing, profiling and debugging your
1729 codes,
1729 codes,
1730 \item
1730 \item
1731 syntax highlighted error messages with much more detail than the
1731 syntax highlighted error messages with much more detail than the
1732 default Python ones,
1732 default Python ones,
1733 \item
1733 \item
1734 logging and access to all previous history of inputs, including
1734 logging and access to all previous history of inputs, including
1735 across sessions
1735 across sessions
1736 \end{itemize}
1736 \end{itemize}
1737 \item
1737 \item
1738 A Qt console that provides the look and feel of a terminal, but adds
1738 A Qt console that provides the look and feel of a terminal, but adds
1739 support for inline figures, graphical calltips, a persistent session
1739 support for inline figures, graphical calltips, a persistent session
1740 that can survive crashes (even segfaults) of the kernel process, and
1740 that can survive crashes (even segfaults) of the kernel process, and
1741 more.
1741 more.
1742 \item
1742 \item
1743 A web-based notebook that can execute code and also contain rich text
1743 A web-based notebook that can execute code and also contain rich text
1744 and figures, mathematical equations and arbitrary HTML. This notebook
1744 and figures, mathematical equations and arbitrary HTML. This notebook
1745 presents a document-like view with cells where code is executed but
1745 presents a document-like view with cells where code is executed but
1746 that can be edited in-place, reordered, mixed with explanatory text
1746 that can be edited in-place, reordered, mixed with explanatory text
1747 and figures, etc.
1747 and figures, etc.
1748 \item
1748 \item
1749 A high-performance, low-latency system for parallel computing that
1749 A high-performance, low-latency system for parallel computing that
1750 supports the control of a cluster of IPython engines communicating
1750 supports the control of a cluster of IPython engines communicating
1751 over a network, with optimizations that minimize unnecessary copying
1751 over a network, with optimizations that minimize unnecessary copying
1752 of large objects (especially numpy arrays).
1752 of large objects (especially numpy arrays).
1753 \end{enumerate}
1753 \end{enumerate}
1754 We will now discuss the highlights of the tools 1-3 above so that you
1754 We will now discuss the highlights of the tools 1-3 above so that you
1755 can make them an effective part of your workflow. The topic of parallel
1755 can make them an effective part of your workflow. The topic of parallel
1756 computing is beyond the scope of this document, but we encourage you to
1756 computing is beyond the scope of this document, but we encourage you to
1757 read the extensive
1757 read the extensive
1758 \href{http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html}{documentation}
1758 \href{http://ipython.org/ipython-doc/rel-0.12.1/parallel/index.html}{documentation}
1759 and \href{http://minrk.github.com/scipy-tutorial-2011/}{tutorials} on
1759 and \href{http://minrk.github.com/scipy-tutorial-2011/}{tutorials} on
1760 this available on the IPython website.
1760 this available on the IPython website.
1761
1761
1762 \subsection{The IPython terminal}
1762 \subsection{The IPython terminal}
1763 You can start IPython at the terminal simply by typing:
1763 You can start IPython at the terminal simply by typing:
1764
1764
1765 \begin{verbatim}
1765 \begin{verbatim}
1766 $ ipython
1766 $ ipython
1767 \end{verbatim}
1767 \end{verbatim}
1768 which will provide you some basic information about how to get started
1768 which will provide you some basic information about how to get started
1769 and will then open a prompt labeled \texttt{In {[}1{]}:} for you to
1769 and will then open a prompt labeled \texttt{In {[}1{]}:} for you to
1770 start typing. Here we type $2^{64}$ and Python computes the result for
1770 start typing. Here we type $2^{64}$ and Python computes the result for
1771 us in exact arithmetic, returning it as \texttt{Out{[}1{]}}:
1771 us in exact arithmetic, returning it as \texttt{Out{[}1{]}}:
1772
1772
1773 \begin{verbatim}
1773 \begin{verbatim}
1774 $ ipython
1774 $ ipython
1775 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1775 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1776 Type "copyright", "credits" or "license" for more information.
1776 Type "copyright", "credits" or "license" for more information.
1777
1777
1778 IPython 0.13.dev -- An enhanced Interactive Python.
1778 IPython 0.13.dev -- An enhanced Interactive Python.
1779 ? -> Introduction and overview of IPython's features.
1779 ? -> Introduction and overview of IPython's features.
1780 %quickref -> Quick reference.
1780 %quickref -> Quick reference.
1781 help -> Python's own help system.
1781 help -> Python's own help system.
1782 object? -> Details about 'object', use 'object??' for extra details.
1782 object? -> Details about 'object', use 'object??' for extra details.
1783
1783
1784 In [1]: 2**64
1784 In [1]: 2**64
1785 Out[1]: 18446744073709551616L
1785 Out[1]: 18446744073709551616L
1786 \end{verbatim}
1786 \end{verbatim}
1787 The first thing you should know about IPython is that all your inputs
1787 The first thing you should know about IPython is that all your inputs
1788 and outputs are saved. There are two variables named \texttt{In} and
1788 and outputs are saved. There are two variables named \texttt{In} and
1789 \texttt{Out} which are filled as you work with your results.
1789 \texttt{Out} which are filled as you work with your results.
1790 Furthermore, all outputs are also saved to auto-created variables of the
1790 Furthermore, all outputs are also saved to auto-created variables of the
1791 form \texttt{\_NN} where \texttt{NN} is the prompt number, and inputs to
1791 form \texttt{\_NN} where \texttt{NN} is the prompt number, and inputs to
1792 \texttt{\_iNN}. This allows you to recover quickly the result of a prior
1792 \texttt{\_iNN}. This allows you to recover quickly the result of a prior
1793 computation by referring to its number even if you forgot to store it as
1793 computation by referring to its number even if you forgot to store it as
1794 a variable. For example, later on in the above session you can do:
1794 a variable. For example, later on in the above session you can do:
1795
1795
1796 \begin{verbatim}
1796 \begin{verbatim}
1797 In [6]: print _1
1797 In [6]: print _1
1798 18446744073709551616
1798 18446744073709551616
1799 \end{verbatim}
1799 \end{verbatim}
1800
1800
1801
1801
1802 We strongly recommend that you take a few minutes to read at least the
1802 We strongly recommend that you take a few minutes to read at least the
1803 basic introduction provided by the \texttt{?} command, and keep in mind
1803 basic introduction provided by the \texttt{?} command, and keep in mind
1804 that the \texttt{\%quickref} command at all times can be used as a quick
1804 that the \texttt{\%quickref} command at all times can be used as a quick
1805 reference ``cheat sheet'' of the most frequently used features of
1805 reference ``cheat sheet'' of the most frequently used features of
1806 IPython.
1806 IPython.
1807
1807
1808 At the IPython prompt, any valid Python code that you type will be
1808 At the IPython prompt, any valid Python code that you type will be
1809 executed similarly to the default Python shell (though often with more
1809 executed similarly to the default Python shell (though often with more
1810 informative feedback). But since IPython is a \emph{superset} of the
1810 informative feedback). But since IPython is a \emph{superset} of the
1811 default Python shell; let's have a brief look at some of its additional
1811 default Python shell; let's have a brief look at some of its additional
1812 functionality.
1812 functionality.
1813
1813
1814 \textbf{Object introspection}
1814 \textbf{Object introspection}
1815
1815
1816 A simple \texttt{?} command provides a general introduction to IPython,
1816 A simple \texttt{?} command provides a general introduction to IPython,
1817 but as indicated in the banner above, you can use the \texttt{?} syntax
1817 but as indicated in the banner above, you can use the \texttt{?} syntax
1818 to ask for details about any object. For example, if we type
1818 to ask for details about any object. For example, if we type
1819 \texttt{\_1?}, IPython will print the following details about this
1819 \texttt{\_1?}, IPython will print the following details about this
1820 variable:
1820 variable:
1821
1821
1822 \begin{verbatim}
1822 \begin{verbatim}
1823 In [14]: _1?
1823 In [14]: _1?
1824 Type: long
1824 Type: long
1825 Base Class: <type 'long'>
1825 Base Class: <type 'long'>
1826 String Form:18446744073709551616
1826 String Form:18446744073709551616
1827 Namespace: Interactive
1827 Namespace: Interactive
1828 Docstring:
1828 Docstring:
1829 long(x[, base]) -> integer
1829 long(x[, base]) -> integer
1830
1830
1831 Convert a string or number to a long integer, if possible. A floating
1831 Convert a string or number to a long integer, if possible. A floating
1832
1832
1833 [etc... snipped for brevity]
1833 [etc... snipped for brevity]
1834 \end{verbatim}
1834 \end{verbatim}
1835 If you add a second \texttt{?} and for any oobject \texttt{x} type
1835 If you add a second \texttt{?} and for any oobject \texttt{x} type
1836 \texttt{x??}, IPython will try to provide an even more detailed analsysi
1836 \texttt{x??}, IPython will try to provide an even more detailed analsysi
1837 of the object, including its syntax-highlighted source code when it can
1837 of the object, including its syntax-highlighted source code when it can
1838 be found. It's possible that \texttt{x??} returns the same information
1838 be found. It's possible that \texttt{x??} returns the same information
1839 as \texttt{x?}, but in many cases \texttt{x??} will indeed provide
1839 as \texttt{x?}, but in many cases \texttt{x??} will indeed provide
1840 additional details.
1840 additional details.
1841
1841
1842 Finally, the \texttt{?} syntax is also useful to search
1842 Finally, the \texttt{?} syntax is also useful to search
1843 \emph{namespaces} with wildcards. Suppose you are wondering if there is
1843 \emph{namespaces} with wildcards. Suppose you are wondering if there is
1844 any function in Numpy that may do text-related things; with
1844 any function in Numpy that may do text-related things; with
1845 \texttt{np.*txt*?}, IPython will print all the names in the \texttt{np}
1845 \texttt{np.*txt*?}, IPython will print all the names in the \texttt{np}
1846 namespace (our Numpy shorthand) that have `txt' anywhere in their name:
1846 namespace (our Numpy shorthand) that have `txt' anywhere in their name:
1847
1847
1848 \begin{verbatim}
1848 \begin{verbatim}
1849 In [17]: np.*txt*?
1849 In [17]: np.*txt*?
1850 np.genfromtxt
1850 np.genfromtxt
1851 np.loadtxt
1851 np.loadtxt
1852 np.mafromtxt
1852 np.mafromtxt
1853 np.ndfromtxt
1853 np.ndfromtxt
1854 np.recfromtxt
1854 np.recfromtxt
1855 np.savetxt
1855 np.savetxt
1856 \end{verbatim}
1856 \end{verbatim}
1857
1857
1858
1858
1859 \textbf{Tab completion}
1859 \textbf{Tab completion}
1860
1860
1861 IPython makes the tab key work extra hard for you as a way to rapidly
1861 IPython makes the tab key work extra hard for you as a way to rapidly
1862 inspect objects and libraries. Whenever you have typed something at the
1862 inspect objects and libraries. Whenever you have typed something at the
1863 prompt, by hitting the \texttt{\textless{}tab\textgreater{}} key IPython
1863 prompt, by hitting the \texttt{\textless{}tab\textgreater{}} key IPython
1864 will try to complete the rest of the line. For this, IPython will
1864 will try to complete the rest of the line. For this, IPython will
1865 analyze the text you had so far and try to search for Python data or
1865 analyze the text you had so far and try to search for Python data or
1866 files that may match the context you have already provided.
1866 files that may match the context you have already provided.
1867
1867
1868 For example, if you type \texttt{np.load} and hit the key, you'll see:
1868 For example, if you type \texttt{np.load} and hit the key, you'll see:
1869
1869
1870 \begin{verbatim}
1870 \begin{verbatim}
1871 In [21]: np.load<TAB HERE>
1871 In [21]: np.load<TAB HERE>
1872 np.load np.loads np.loadtxt
1872 np.load np.loads np.loadtxt
1873 \end{verbatim}
1873 \end{verbatim}
1874 so you can quickly find all the load-related functionality in numpy. Tab
1874 so you can quickly find all the load-related functionality in numpy. Tab
1875 completion works even for function arguments, for example consider this
1875 completion works even for function arguments, for example consider this
1876 function definition:
1876 function definition:
1877
1877
1878 \begin{verbatim}
1878 \begin{verbatim}
1879 In [20]: def f(x, frobinate=False):
1879 In [20]: def f(x, frobinate=False):
1880 ....: if frobinate:
1880 ....: if frobinate:
1881 ....: return x**2
1881 ....: return x**2
1882 ....:
1882 ....:
1883 \end{verbatim}
1883 \end{verbatim}
1884 If you now use the \texttt{\textless{}tab\textgreater{}} key after
1884 If you now use the \texttt{\textless{}tab\textgreater{}} key after
1885 having typed `fro' you'll get all valid Python completions, but those
1885 having typed `fro' you'll get all valid Python completions, but those
1886 marked with \texttt{=} at the end are known to be keywords of your
1886 marked with \texttt{=} at the end are known to be keywords of your
1887 function:
1887 function:
1888
1888
1889 \begin{verbatim}
1889 \begin{verbatim}
1890 In [21]: f(2, fro<TAB HERE>
1890 In [21]: f(2, fro<TAB HERE>
1891 frobinate= frombuffer fromfunction frompyfunc fromstring
1891 frobinate= frombuffer fromfunction frompyfunc fromstring
1892 from fromfile fromiter fromregex frozenset
1892 from fromfile fromiter fromregex frozenset
1893 \end{verbatim}
1893 \end{verbatim}
1894 at this point you can add the \texttt{b} letter and hit
1894 at this point you can add the \texttt{b} letter and hit
1895 \texttt{\textless{}tab\textgreater{}} once more, and IPython will finish
1895 \texttt{\textless{}tab\textgreater{}} once more, and IPython will finish
1896 the line for you:
1896 the line for you:
1897
1897
1898 \begin{verbatim}
1898 \begin{verbatim}
1899 In [21]: f(2, frobinate=
1899 In [21]: f(2, frobinate=
1900 \end{verbatim}
1900 \end{verbatim}
1901 As a beginner, simply get into the habit of using
1901 As a beginner, simply get into the habit of using
1902 \texttt{\textless{}tab\textgreater{}} after most objects; it should
1902 \texttt{\textless{}tab\textgreater{}} after most objects; it should
1903 quickly become second nature as you will see how helps keep a fluid
1903 quickly become second nature as you will see how helps keep a fluid
1904 workflow and discover useful information. Later on you can also
1904 workflow and discover useful information. Later on you can also
1905 customize this behavior by writing your own completion code, if you so
1905 customize this behavior by writing your own completion code, if you so
1906 desire.
1906 desire.
1907
1907
1908 \textbf{Matplotlib integration}
1908 \textbf{Matplotlib integration}
1909
1909
1910 One of the most useful features of IPython for scientists is its tight
1910 One of the most useful features of IPython for scientists is its tight
1911 integration with matplotlib: at the terminal IPython lets you open
1911 integration with matplotlib: at the terminal IPython lets you open
1912 matplotlib figures without blocking your typing (which is what happens
1912 matplotlib figures without blocking your typing (which is what happens
1913 if you try to do the same thing at the default Python shell), and in the
1913 if you try to do the same thing at the default Python shell), and in the
1914 Qt console and notebook you can even view your figures embedded in your
1914 Qt console and notebook you can even view your figures embedded in your
1915 workspace next to the code that created them.
1915 workspace next to the code that created them.
1916
1916
1917 The matplotlib support can be either activated when you start IPython by
1917 The matplotlib support can be either activated when you start IPython by
1918 passing the \texttt{-{}-pylab} flag, or at any point later in your
1918 passing the \texttt{-{}-pylab} flag, or at any point later in your
1919 session by using the \texttt{\%pylab} command. If you start IPython with
1919 session by using the \texttt{\%pylab} command. If you start IPython with
1920 \texttt{-{}-pylab}, you'll see something like this (note the extra
1920 \texttt{-{}-pylab}, you'll see something like this (note the extra
1921 message about pylab):
1921 message about pylab):
1922
1922
1923 \begin{verbatim}
1923 \begin{verbatim}
1924 $ ipython --pylab
1924 $ ipython --pylab
1925 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1925 Python 2.7.2+ (default, Oct 4 2011, 20:03:08)
1926 Type "copyright", "credits" or "license" for more information.
1926 Type "copyright", "credits" or "license" for more information.
1927
1927
1928 IPython 0.13.dev -- An enhanced Interactive Python.
1928 IPython 0.13.dev -- An enhanced Interactive Python.
1929 ? -> Introduction and overview of IPython's features.
1929 ? -> Introduction and overview of IPython's features.
1930 %quickref -> Quick reference.
1930 %quickref -> Quick reference.
1931 help -> Python's own help system.
1931 help -> Python's own help system.
1932 object? -> Details about 'object', use 'object??' for extra details.
1932 object? -> Details about 'object', use 'object??' for extra details.
1933
1933
1934 Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
1934 Welcome to pylab, a matplotlib-based Python environment [backend: Qt4Agg].
1935 For more information, type 'help(pylab)'.
1935 For more information, type 'help(pylab)'.
1936
1936
1937 In [1]:
1937 In [1]:
1938 \end{verbatim}
1938 \end{verbatim}
1939 Furthermore, IPython will import \texttt{numpy} with the \texttt{np}
1939 Furthermore, IPython will import \texttt{numpy} with the \texttt{np}
1940 shorthand, \texttt{matplotlib.pyplot} as \texttt{plt}, and it will also
1940 shorthand, \texttt{matplotlib.pyplot} as \texttt{plt}, and it will also
1941 load all of the numpy and pyplot top-level names so that you can
1941 load all of the numpy and pyplot top-level names so that you can
1942 directly type something like:
1942 directly type something like:
1943
1943
1944 \begin{verbatim}
1944 \begin{verbatim}
1945 In [1]: x = linspace(0, 2*pi, 200)
1945 In [1]: x = linspace(0, 2*pi, 200)
1946
1946
1947 In [2]: plot(x, sin(x))
1947 In [2]: plot(x, sin(x))
1948 Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1948 Out[2]: [<matplotlib.lines.Line2D at 0x9e7c16c>]
1949 \end{verbatim}
1949 \end{verbatim}
1950 instead of having to prefix each call with its full signature (as we
1950 instead of having to prefix each call with its full signature (as we
1951 have been doing in the examples thus far):
1951 have been doing in the examples thus far):
1952
1952
1953 \begin{verbatim}
1953 \begin{verbatim}
1954 In [3]: x = np.linspace(0, 2*np.pi, 200)
1954 In [3]: x = np.linspace(0, 2*np.pi, 200)
1955
1955
1956 In [4]: plt.plot(x, np.sin(x))
1956 In [4]: plt.plot(x, np.sin(x))
1957 Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1957 Out[4]: [<matplotlib.lines.Line2D at 0x9e900ac>]
1958 \end{verbatim}
1958 \end{verbatim}
1959 This shorthand notation can be a huge time-saver when working
1959 This shorthand notation can be a huge time-saver when working
1960 interactively (it's a few characters but you are likely to type them
1960 interactively (it's a few characters but you are likely to type them
1961 hundreds of times in a session). But we should note that as you develop
1961 hundreds of times in a session). But we should note that as you develop
1962 persistent scripts and notebooks meant for reuse, it's best to get in
1962 persistent scripts and notebooks meant for reuse, it's best to get in
1963 the habit of using the longer notation (known as \emph{fully qualified
1963 the habit of using the longer notation (known as \emph{fully qualified
1964 names} as it's clearer where things come from and it makes for more
1964 names} as it's clearer where things come from and it makes for more
1965 robust, readable and maintainable code in the long run).
1965 robust, readable and maintainable code in the long run).
1966
1966
1967 \textbf{Access to the operating system and files}
1967 \textbf{Access to the operating system and files}
1968
1968
1969 In IPython, you can type \texttt{ls} to see your files or \texttt{cd} to
1969 In IPython, you can type \texttt{ls} to see your files or \texttt{cd} to
1970 change directories, just like you would at a regular system prompt:
1970 change directories, just like you would at a regular system prompt:
1971
1971
1972 \begin{verbatim}
1972 \begin{verbatim}
1973 In [2]: cd tests
1973 In [2]: cd tests
1974 /home/fperez/ipython/nbconvert/tests
1974 /home/fperez/ipython/nbconvert/tests
1975
1975
1976 In [3]: ls test.*
1976 In [3]: ls test.*
1977 test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1977 test.aux test.html test.ipynb test.log test.out test.pdf test.rst test.tex
1978 \end{verbatim}
1978 \end{verbatim}
1979 Furthermore, if you use the \texttt{!} at the beginning of a line, any
1979 Furthermore, if you use the \texttt{!} at the beginning of a line, any
1980 commands you pass afterwards go directly to the operating system:
1980 commands you pass afterwards go directly to the operating system:
1981
1981
1982 \begin{verbatim}
1982 \begin{verbatim}
1983 In [4]: !echo "Hello IPython"
1983 In [4]: !echo "Hello IPython"
1984 Hello IPython
1984 Hello IPython
1985 \end{verbatim}
1985 \end{verbatim}
1986 IPython offers a useful twist in this feature: it will substitute in the
1986 IPython offers a useful twist in this feature: it will substitute in the
1987 command the value of any \emph{Python} variable you may have if you
1987 command the value of any \emph{Python} variable you may have if you
1988 prepend it with a \texttt{\$} sign:
1988 prepend it with a \texttt{\$} sign:
1989
1989
1990 \begin{verbatim}
1990 \begin{verbatim}
1991 In [5]: message = 'IPython interpolates from Python to the shell'
1991 In [5]: message = 'IPython interpolates from Python to the shell'
1992
1992
1993 In [6]: !echo $message
1993 In [6]: !echo $message
1994 IPython interpolates from Python to the shell
1994 IPython interpolates from Python to the shell
1995 \end{verbatim}
1995 \end{verbatim}
1996 This feature can be extremely useful, as it lets you combine the power
1996 This feature can be extremely useful, as it lets you combine the power
1997 and clarity of Python for complex logic with the immediacy and
1997 and clarity of Python for complex logic with the immediacy and
1998 familiarity of many shell commands. Additionally, if you start the line
1998 familiarity of many shell commands. Additionally, if you start the line
1999 with \emph{two} \texttt{\$\$} signs, the output of the command will be
1999 with \emph{two} \texttt{\$\$} signs, the output of the command will be
2000 automatically captured as a list of lines, e.g.:
2000 automatically captured as a list of lines, e.g.:
2001
2001
2002 \begin{verbatim}
2002 \begin{verbatim}
2003 In [10]: !!ls test.*
2003 In [10]: !!ls test.*
2004 Out[10]:
2004 Out[10]:
2005 ['test.aux',
2005 ['test.aux',
2006 'test.html',
2006 'test.html',
2007 'test.ipynb',
2007 'test.ipynb',
2008 'test.log',
2008 'test.log',
2009 'test.out',
2009 'test.out',
2010 'test.pdf',
2010 'test.pdf',
2011 'test.rst',
2011 'test.rst',
2012 'test.tex']
2012 'test.tex']
2013 \end{verbatim}
2013 \end{verbatim}
2014 As explained above, you can now use this as the variable \texttt{\_10}.
2014 As explained above, you can now use this as the variable \texttt{\_10}.
2015 If you directly want to capture the output of a system command to a
2015 If you directly want to capture the output of a system command to a
2016 Python variable, you can use the syntax \texttt{=!}:
2016 Python variable, you can use the syntax \texttt{=!}:
2017
2017
2018 \begin{verbatim}
2018 \begin{verbatim}
2019 In [11]: testfiles =! ls test.*
2019 In [11]: testfiles =! ls test.*
2020
2020
2021 In [12]: print testfiles
2021 In [12]: print testfiles
2022 ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
2022 ['test.aux', 'test.html', 'test.ipynb', 'test.log', 'test.out', 'test.pdf', 'test.rst', 'test.tex']
2023 \end{verbatim}
2023 \end{verbatim}
2024 Finally, the special \texttt{\%alias} command lets you define names that
2024 Finally, the special \texttt{\%alias} command lets you define names that
2025 are shorthands for system commands, so that you can type them without
2025 are shorthands for system commands, so that you can type them without
2026 having to prefix them via \texttt{!} explicitly (for example,
2026 having to prefix them via \texttt{!} explicitly (for example,
2027 \texttt{ls} is an alias that has been predefined for you at startup).
2027 \texttt{ls} is an alias that has been predefined for you at startup).
2028
2028
2029 \textbf{Magic commands}
2029 \textbf{Magic commands}
2030
2030
2031 IPython has a system for special commands, called `magics', that let you
2031 IPython has a system for special commands, called `magics', that let you
2032 control IPython itself and perform many common tasks with a more
2032 control IPython itself and perform many common tasks with a more
2033 shell-like syntax: it uses spaces for delimiting arguments, flags can be
2033 shell-like syntax: it uses spaces for delimiting arguments, flags can be
2034 set with dashes and all arguments are treated as strings, so no
2034 set with dashes and all arguments are treated as strings, so no
2035 additional quoting is required. This kind of syntax is invalid in the
2035 additional quoting is required. This kind of syntax is invalid in the
2036 Python language but very convenient for interactive typing (less
2036 Python language but very convenient for interactive typing (less
2037 parentheses, commans and quoting everywhere); IPython distinguishes the
2037 parentheses, commans and quoting everywhere); IPython distinguishes the
2038 two by detecting lines that start with the \texttt{\%} character.
2038 two by detecting lines that start with the \texttt{\%} character.
2039
2039
2040 You can learn more about the magic system by simply typing
2040 You can learn more about the magic system by simply typing
2041 \texttt{\%magic} at the prompt, which will give you a short description
2041 \texttt{\%magic} at the prompt, which will give you a short description
2042 plus the documentation on \emph{all} available magics. If you want to
2042 plus the documentation on \emph{all} available magics. If you want to
2043 see only a listing of existing magics, you can use \texttt{\%lsmagic}:
2043 see only a listing of existing magics, you can use \texttt{\%lsmagic}:
2044
2044
2045 \begin{verbatim}
2045 \begin{verbatim}
2046 In [4]: lsmagic
2046 In [4]: lsmagic
2047 Available magic functions:
2047 Available magic functions:
2048 %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
2048 %alias %autocall %autoindent %automagic %bookmark %c %cd %colors %config %cpaste
2049 %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
2049 %debug %dhist %dirs %doctest_mode %ds %ed %edit %env %gui %hist %history
2050 %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
2050 %install_default_config %install_ext %install_profiles %load_ext %loadpy %logoff %logon
2051 %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
2051 %logstart %logstate %logstop %lsmagic %macro %magic %notebook %page %paste %pastebin
2052 %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
2052 %pd %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pop %popd %pprint %precision %profile
2053 %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
2053 %prun %psearch %psource %pushd %pwd %pycat %pylab %quickref %recall %rehashx
2054 %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
2054 %reload_ext %rep %rerun %reset %reset_selective %run %save %sc %stop %store %sx %tb
2055 %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
2055 %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
2056
2056
2057 Automagic is ON, % prefix NOT needed for magic functions.
2057 Automagic is ON, % prefix NOT needed for magic functions.
2058 \end{verbatim}
2058 \end{verbatim}
2059 Note how the example above omitted the eplicit \texttt{\%} marker and
2059 Note how the example above omitted the eplicit \texttt{\%} marker and
2060 simply uses \texttt{lsmagic}. As long as the `automagic' feature is on
2060 simply uses \texttt{lsmagic}. As long as the `automagic' feature is on
2061 (which it is by default), you can omit the \texttt{\%} marker as long as
2061 (which it is by default), you can omit the \texttt{\%} marker as long as
2062 there is no ambiguity with a Python variable of the same name.
2062 there is no ambiguity with a Python variable of the same name.
2063
2063
2064 \textbf{Running your code}
2064 \textbf{Running your code}
2065
2065
2066 While it's easy to type a few lines of code in IPython, for any
2066 While it's easy to type a few lines of code in IPython, for any
2067 long-lived work you should keep your codes in Python scripts (or in
2067 long-lived work you should keep your codes in Python scripts (or in
2068 IPython notebooks, see below). Consider that you have a script, in this
2068 IPython notebooks, see below). Consider that you have a script, in this
2069 case trivially simple for the sake of brevity, named \texttt{simple.py}:
2069 case trivially simple for the sake of brevity, named \texttt{simple.py}:
2070
2070
2071 \begin{verbatim}
2071 \begin{verbatim}
2072 In [12]: !cat simple.py
2072 In [12]: !cat simple.py
2073 import numpy as np
2073 import numpy as np
2074
2074
2075 x = np.random.normal(size=100)
2075 x = np.random.normal(size=100)
2076
2076
2077 print 'First elment of x:', x[0]
2077 print 'First elment of x:', x[0]
2078 \end{verbatim}
2078 \end{verbatim}
2079 The typical workflow with IPython is to use the \texttt{\%run} magic to
2079 The typical workflow with IPython is to use the \texttt{\%run} magic to
2080 execute your script (you can omit the .py extension if you want). When
2080 execute your script (you can omit the .py extension if you want). When
2081 you run it, the script will execute just as if it had been run at the
2081 you run it, the script will execute just as if it had been run at the
2082 system prompt with \texttt{python simple.py} (though since modules don't
2082 system prompt with \texttt{python simple.py} (though since modules don't
2083 get re-executed on new imports by Python, all system initialization is
2083 get re-executed on new imports by Python, all system initialization is
2084 essentially free, which can have a significant run time impact in some
2084 essentially free, which can have a significant run time impact in some
2085 cases):
2085 cases):
2086
2086
2087 \begin{verbatim}
2087 \begin{verbatim}
2088 In [13]: run simple
2088 In [13]: run simple
2089 First elment of x: -1.55872256289
2089 First elment of x: -1.55872256289
2090 \end{verbatim}
2090 \end{verbatim}
2091 Once it completes, all variables defined in it become available for you
2091 Once it completes, all variables defined in it become available for you
2092 to use interactively:
2092 to use interactively:
2093
2093
2094 \begin{verbatim}
2094 \begin{verbatim}
2095 In [14]: x.shape
2095 In [14]: x.shape
2096 Out[14]: (100,)
2096 Out[14]: (100,)
2097 \end{verbatim}
2097 \end{verbatim}
2098 This allows you to plot data, try out ideas, etc, in a
2098 This allows you to plot data, try out ideas, etc, in a
2099 \texttt{\%run}/interact/edit cycle that can be very productive. As you
2099 \texttt{\%run}/interact/edit cycle that can be very productive. As you
2100 start understanding your problem better you can refine your script
2100 start understanding your problem better you can refine your script
2101 further, incrementally improving it based on the work you do at the
2101 further, incrementally improving it based on the work you do at the
2102 IPython prompt. At any point you can use the \texttt{\%hist} magic to
2102 IPython prompt. At any point you can use the \texttt{\%hist} magic to
2103 print out your history without prompts, so that you can copy useful
2103 print out your history without prompts, so that you can copy useful
2104 fragments back into the script.
2104 fragments back into the script.
2105
2105
2106 By default, \texttt{\%run} executes scripts in a completely empty
2106 By default, \texttt{\%run} executes scripts in a completely empty
2107 namespace, to better mimic how they would execute at the system prompt
2107 namespace, to better mimic how they would execute at the system prompt
2108 with plain Python. But if you use the \texttt{-i} flag, the script will
2108 with plain Python. But if you use the \texttt{-i} flag, the script will
2109 also see your interactively defined variables. This lets you edit in a
2109 also see your interactively defined variables. This lets you edit in a
2110 script larger amounts of code that still behave as if you had typed them
2110 script larger amounts of code that still behave as if you had typed them
2111 at the IPython prompt.
2111 at the IPython prompt.
2112
2112
2113 You can also get a summary of the time taken by your script with the
2113 You can also get a summary of the time taken by your script with the
2114 \texttt{-t} flag; consider a different script \texttt{randsvd.py} that
2114 \texttt{-t} flag; consider a different script \texttt{randsvd.py} that
2115 takes a bit longer to run:
2115 takes a bit longer to run:
2116
2116
2117 \begin{verbatim}
2117 \begin{verbatim}
2118 In [21]: run -t randsvd.py
2118 In [21]: run -t randsvd.py
2119
2119
2120 IPython CPU timings (estimated):
2120 IPython CPU timings (estimated):
2121 User : 0.38 s.
2121 User : 0.38 s.
2122 System : 0.04 s.
2122 System : 0.04 s.
2123 Wall time: 0.34 s.
2123 Wall time: 0.34 s.
2124 \end{verbatim}
2124 \end{verbatim}
2125 \texttt{User} is the time spent by the computer executing your code,
2125 \texttt{User} is the time spent by the computer executing your code,
2126 while \texttt{System} is the time the operating system had to work on
2126 while \texttt{System} is the time the operating system had to work on
2127 your behalf, doing things like memory allocation that are needed by your
2127 your behalf, doing things like memory allocation that are needed by your
2128 code but that you didn't explicitly program and that happen inside the
2128 code but that you didn't explicitly program and that happen inside the
2129 kernel. The \texttt{Wall time} is the time on a `clock on the wall'
2129 kernel. The \texttt{Wall time} is the time on a `clock on the wall'
2130 between the start and end of your program.
2130 between the start and end of your program.
2131
2131
2132 If \texttt{Wall \textgreater{} User+System}, your code is most likely
2132 If \texttt{Wall \textgreater{} User+System}, your code is most likely
2133 waiting idle for certain periods. That could be waiting for data to
2133 waiting idle for certain periods. That could be waiting for data to
2134 arrive from a remote source or perhaps because the operating system has
2134 arrive from a remote source or perhaps because the operating system has
2135 to swap large amounts of virtual memory. If you know that your code
2135 to swap large amounts of virtual memory. If you know that your code
2136 doesn't explicitly wait for remote data to arrive, you should
2136 doesn't explicitly wait for remote data to arrive, you should
2137 investigate further to identify possible ways of improving the
2137 investigate further to identify possible ways of improving the
2138 performance profile.
2138 performance profile.
2139
2139
2140 If you only want to time how long a single statement takes, you don't
2140 If you only want to time how long a single statement takes, you don't
2141 need to put it into a script as you can use the \texttt{\%timeit} magic,
2141 need to put it into a script as you can use the \texttt{\%timeit} magic,
2142 which uses Python's \texttt{timeit} module to very carefully measure
2142 which uses Python's \texttt{timeit} module to very carefully measure
2143 timig data; \texttt{timeit} can measure even short statements that
2143 timig data; \texttt{timeit} can measure even short statements that
2144 execute extremely fast:
2144 execute extremely fast:
2145
2145
2146 \begin{verbatim}
2146 \begin{verbatim}
2147 In [27]: %timeit a=1
2147 In [27]: %timeit a=1
2148 10000000 loops, best of 3: 23 ns per loop
2148 10000000 loops, best of 3: 23 ns per loop
2149 \end{verbatim}
2149 \end{verbatim}
2150 and for code that runs longer, it automatically adjusts so the overall
2150 and for code that runs longer, it automatically adjusts so the overall
2151 measurement doesn't take too long:
2151 measurement doesn't take too long:
2152
2152
2153 \begin{verbatim}
2153 \begin{verbatim}
2154 In [28]: %timeit np.linalg.svd(x)
2154 In [28]: %timeit np.linalg.svd(x)
2155 1 loops, best of 3: 310 ms per loop
2155 1 loops, best of 3: 310 ms per loop
2156 \end{verbatim}
2156 \end{verbatim}
2157 The \texttt{\%run} magic still has more options for debugging and
2157 The \texttt{\%run} magic still has more options for debugging and
2158 profiling data; you should read its documentation for many useful
2158 profiling data; you should read its documentation for many useful
2159 details (as always, just type \texttt{\%run?}).
2159 details (as always, just type \texttt{\%run?}).
2160
2160
2161 \subsection{The graphical Qt console}
2161 \subsection{The graphical Qt console}
2162 If you type at the system prompt (see the IPython website for
2162 If you type at the system prompt (see the IPython website for
2163 installation details, as this requires some additional libraries):
2163 installation details, as this requires some additional libraries):
2164
2164
2165 \begin{verbatim}
2165 \begin{verbatim}
2166 $ ipython qtconsole
2166 $ ipython qtconsole
2167 \end{verbatim}
2167 \end{verbatim}
2168 instead of opening in a terminal as before, IPython will start a
2168 instead of opening in a terminal as before, IPython will start a
2169 graphical console that at first sight appears just like a terminal, but
2169 graphical console that at first sight appears just like a terminal, but
2170 which is in fact much more capable than a text-only terminal. This is a
2170 which is in fact much more capable than a text-only terminal. This is a
2171 specialized terminal designed for interactive scientific work, and it
2171 specialized terminal designed for interactive scientific work, and it
2172 supports full multi-line editing with color highlighting and graphical
2172 supports full multi-line editing with color highlighting and graphical
2173 calltips for functions, it can keep multiple IPython sessions open
2173 calltips for functions, it can keep multiple IPython sessions open
2174 simultaneously in tabs, and when scripts run it can display the figures
2174 simultaneously in tabs, and when scripts run it can display the figures
2175 inline directly in the work area.
2175 inline directly in the work area.
2176
2176
2177 % This cell is for the pdflatex output only
2177 % This cell is for the pdflatex output only
2178 \begin{figure}[htbp]
2178 \begin{figure}[htbp]
2179 \centering
2179 \centering
2180 \includegraphics[width=3in]{ipython_qtconsole2.png}
2180 \includegraphics[width=3in]{ipython_qtconsole2.png}
2181 \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
2181 \caption{The IPython Qt console: a lightweight terminal for scientific exploration, with code, results and graphics in a soingle environment.}
2182 \end{figure}
2182 \end{figure}
2183 The Qt console accepts the same \texttt{-{}-pylab} startup flags as the
2183 The Qt console accepts the same \texttt{-{}-pylab} startup flags as the
2184 terminal, but you can additionally supply the value
2184 terminal, but you can additionally supply the value
2185 \texttt{-{}-pylab inline}, which enables the support for inline graphics
2185 \texttt{-{}-pylab inline}, which enables the support for inline graphics
2186 shown in the figure. This is ideal for keeping all the code and figures
2186 shown in the figure. This is ideal for keeping all the code and figures
2187 in the same session, given that the console can save the output of your
2187 in the same session, given that the console can save the output of your
2188 entire session to HTML or PDF.
2188 entire session to HTML or PDF.
2189
2189
2190 Since the Qt console makes it far more convenient than the terminal to
2190 Since the Qt console makes it far more convenient than the terminal to
2191 edit blocks of code with multiple lines, in this environment it's worth
2191 edit blocks of code with multiple lines, in this environment it's worth
2192 knowing about the \texttt{\%loadpy} magic function. \texttt{\%loadpy}
2192 knowing about the \texttt{\%loadpy} magic function. \texttt{\%loadpy}
2193 takes a path to a local file or remote URL, fetches its contents, and
2193 takes a path to a local file or remote URL, fetches its contents, and
2194 puts it in the work area for you to further edit and execute. It can be
2194 puts it in the work area for you to further edit and execute. It can be
2195 an extremely fast and convenient way of loading code from local disk or
2195 an extremely fast and convenient way of loading code from local disk or
2196 remote examples from sites such as the
2196 remote examples from sites such as the
2197 \href{http://matplotlib.sourceforge.net/gallery.html}{Matplotlib
2197 \href{http://matplotlib.sourceforge.net/gallery.html}{Matplotlib
2198 gallery}.
2198 gallery}.
2199
2199
2200 Other than its enhanced capabilities for code and graphics, all of the
2200 Other than its enhanced capabilities for code and graphics, all of the
2201 features of IPython we've explained before remain functional in this
2201 features of IPython we've explained before remain functional in this
2202 graphical console.
2202 graphical console.
2203
2203
2204 \subsection{The IPython Notebook}
2204 \subsection{The IPython Notebook}
2205 The third way to interact with IPython, in addition to the terminal and
2205 The third way to interact with IPython, in addition to the terminal and
2206 graphical Qt console, is a powerful web interface called the ``IPython
2206 graphical Qt console, is a powerful web interface called the ``IPython
2207 Notebook''. If you run at the system console (you can omit the
2207 Notebook''. If you run at the system console (you can omit the
2208 \texttt{pylab} flags if you don't need plotting support):
2208 \texttt{pylab} flags if you don't need plotting support):
2209
2209
2210 \begin{verbatim}
2210 \begin{verbatim}
2211 $ ipython notebook --pylab inline
2211 $ ipython notebook --pylab inline
2212 \end{verbatim}
2212 \end{verbatim}
2213 IPython will start a process that runs a web server in your local
2213 IPython will start a process that runs a web server in your local
2214 machine and to which a web browser can connect. The Notebook is a
2214 machine and to which a web browser can connect. The Notebook is a
2215 workspace that lets you execute code in blocks called `cells' and
2215 workspace that lets you execute code in blocks called `cells' and
2216 displays any results and figures, but which can also contain arbitrary
2216 displays any results and figures, but which can also contain arbitrary
2217 text (including LaTeX-formatted mathematical expressions) and any rich
2217 text (including LaTeX-formatted mathematical expressions) and any rich
2218 media that a modern web browser is capable of displaying.
2218 media that a modern web browser is capable of displaying.
2219
2219
2220 % This cell is for the pdflatex output only
2220 % This cell is for the pdflatex output only
2221 \begin{figure}[htbp]
2221 \begin{figure}[htbp]
2222 \centering
2222 \centering
2223 \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
2223 \includegraphics[width=3in]{ipython-notebook-specgram-2.png}
2224 \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
2224 \caption{The IPython Notebook: text, equations, code, results, graphics and other multimedia in an open format for scientific exploration and collaboration}
2225 \end{figure}
2225 \end{figure}
2226 In fact, this document was written as a Notebook, and only exported to
2226 In fact, this document was written as a Notebook, and only exported to
2227 LaTeX for printing. Inside of each cell, all the features of IPython
2227 LaTeX for printing. Inside of each cell, all the features of IPython
2228 that we have discussed before remain functional, since ultimately this
2228 that we have discussed before remain functional, since ultimately this
2229 web client is communicating with the same IPython code that runs in the
2229 web client is communicating with the same IPython code that runs in the
2230 terminal. But this interface is a much more rich and powerful
2230 terminal. But this interface is a much more rich and powerful
2231 environment for maintaining long-term ``live and executable'' scientific
2231 environment for maintaining long-term ``live and executable'' scientific
2232 documents.
2232 documents.
2233
2233
2234 Notebook environments have existed in commercial systems like
2234 Notebook environments have existed in commercial systems like
2235 Mathematica(TM) and Maple(TM) for a long time; in the open source world
2235 Mathematica(TM) and Maple(TM) for a long time; in the open source world
2236 the \href{http://sagemath.org}{Sage} project blazed this particular
2236 the \href{http://sagemath.org}{Sage} project blazed this particular
2237 trail starting in 2006, and now we bring all the features that have made
2237 trail starting in 2006, and now we bring all the features that have made
2238 IPython such a widely used tool to a Notebook model.
2238 IPython such a widely used tool to a Notebook model.
2239
2239
2240 Since the Notebook runs as a web application, it is possible to
2240 Since the Notebook runs as a web application, it is possible to
2241 configure it for remote access, letting you run your computations on a
2241 configure it for remote access, letting you run your computations on a
2242 persistent server close to your data, which you can then access remotely
2242 persistent server close to your data, which you can then access remotely
2243 from any browser-equipped computer. We encourage you to read the
2243 from any browser-equipped computer. We encourage you to read the
2244 extensive documentation provided by the IPython project for details on
2244 extensive documentation provided by the IPython project for details on
2245 how to do this and many more features of the notebook.
2245 how to do this and many more features of the notebook.
2246
2246
2247 Finally, as we said earlier, IPython also has a high-level and easy to
2247 Finally, as we said earlier, IPython also has a high-level and easy to
2248 use set of libraries for parallel computing, that let you control
2248 use set of libraries for parallel computing, that let you control
2249 (interactively if desired) not just one IPython but an entire cluster of
2249 (interactively if desired) not just one IPython but an entire cluster of
2250 `IPython engines'. Unfortunately a detailed discussion of these tools is
2250 `IPython engines'. Unfortunately a detailed discussion of these tools is
2251 beyond the scope of this text, but should you need to parallelize your
2251 beyond the scope of this text, but should you need to parallelize your
2252 analysis codes, a quick read of the tutorials and examples provided at
2252 analysis codes, a quick read of the tutorials and examples provided at
2253 the IPython site may prove fruitful.
2253 the IPython site may prove fruitful.
2254
2254
2255 \end{document}
2255 \end{document}
General Comments 0
You need to be logged in to leave comments. Login now