upstream/ipython Commit - r14208:4f12e03d

proper python exception handling

stonebig <stonebig> -

r14208:4f12e03d

parent child

examples/parallel/davinci/pwordfreq.py

0 +2 -2

              #!/usr/bin/env python
              """Parallel word frequency counter.
              This only works for a local cluster, because the filenames are local paths.
              """
              import os
              import time
              import urllib
              from itertools import repeat
              from wordfreq import print_wordfreq, wordfreq
              from IPython.parallel import Client, Reference
              from __future__ import division
-             try : #python2
                  from urllib import urlretrieve
-             except : #python3
+             except ImportError: #python3
                  from urllib.request import urlretrieve
              davinci_url = "http://www.gutenberg.org/cache/epub/5000/pg5000.txt"
              def pwordfreq(view, fnames):
                  """Parallel word frequency counter.
                  view - An IPython DirectView
                  fnames - The filenames containing the split data.
                  """
                  assert len(fnames) == len(view.targets)
                  view.scatter('fname', fnames, flatten=True)
                  ar = view.apply(wordfreq, Reference('fname'))
                  freqs_list = ar.get()
                  word_set = set()
                  for f in freqs_list:
                      word_set.update(f.keys())
                  freqs = dict(zip(word_set, repeat(0)))
                  for f in freqs_list:
                      for word, count in f.items():
                          freqs[word] += count
                  return freqs
              if __name__ == '__main__':
                  # Create a Client and View
                  rc = Client()
                  view = rc[:]
                  if not os.path.exists('davinci.txt'):
                      # download from project gutenberg
                      print("Downloading Da Vinci's notebooks from Project Gutenberg")
                      urlretrieve(davinci_url, 'davinci.txt')
                  # Run the serial version
                  print("Serial word frequency count:")
                  text = open('davinci.txt').read()
                  tic = time.time()
                  freqs = wordfreq(text)
                  toc = time.time()
                  print_wordfreq(freqs, 10)
                  print("Took %.3f s to calculate"%(toc-tic))
                  # The parallel version
                  print("\nParallel word frequency count:")
                  # split the davinci.txt into one file per engine:
                  lines = text.splitlines()
                  nlines = len(lines)
                  n = len(rc)
                  block = nlines//n
                  for i in range(n):
                      chunk = lines[i*block:i*(block+1)]
                      with open('davinci%i.txt'%i, 'w') as f:
                          f.write('\n'.join(chunk))
-                 try : #python2
                      cwd = os.path.abspath(os.getcwdu())
-                 except : #python3
+                 except AttributeError: #python3
                      cwd = os.path.abspath(os.getcwd())
                  fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)]
                  tic = time.time()
                  pfreqs = pwordfreq(view,fnames)
                  toc = time.time()
                  print_wordfreq(freqs)
                  print("Took %.3f s to calculate on %i engines"%(toc-tic, len(view.targets)))
                  # cleanup split files
                  map(os.remove, fnames)

examples/parallel/pi/pidigits.py

0 +1 -1

              """Compute statistics on the digits of pi.
              This uses precomputed digits of pi from the website
              of Professor Yasumasa Kanada at the University of
              Tokoyo: http://www.super-computing.org/
              Currently, there are only functions to read the
              .txt (non-compressed, non-binary) files, but adding
              support for compression and binary files would be
              straightforward.
              This focuses on computing the number of times that
              all 1, 2, n digits sequences occur in the digits of pi.
              If the digits of pi are truly random, these frequencies
              should be equal.
              """
              # Import statements
              from __future__ import division, with_statement
              import numpy as np
              from matplotlib import pyplot as plt
              try : #python2
                  from urllib import urlretrieve
-             except : #python3
+             except ImportError : #python3
                  from urllib.request import urlretrieve
              	# Top-level functions
              def fetch_pi_file(filename):
                  """This will download a segment of pi from super-computing.org
                  if the file is not already present.
                  """
                  import os, urllib
                  ftpdir="ftp://pi.super-computing.org/.2/pi200m/"
                  if os.path.exists(filename):
                      # we already have it
                      return
                  else:
                      # download it
                      urlretrieve(ftpdir+filename,filename)
              def compute_one_digit_freqs(filename):
                  """
                  Read digits of pi from a file and compute the 1 digit frequencies.
                  """
                  d = txt_file_to_digits(filename)
                  freqs = one_digit_freqs(d)
                  return freqs
              def compute_two_digit_freqs(filename):
                  """
                  Read digits of pi from a file and compute the 2 digit frequencies.
                  """
                  d = txt_file_to_digits(filename)
                  freqs = two_digit_freqs(d)
                  return freqs
              def reduce_freqs(freqlist):
                  """
                  Add up a list of freq counts to get the total counts.
                  """
                  allfreqs = np.zeros_like(freqlist[0])
                  for f in freqlist:
                      allfreqs += f
                  return allfreqs
              def compute_n_digit_freqs(filename, n):
                  """
                  Read digits of pi from a file and compute the n digit frequencies.
                  """
                  d = txt_file_to_digits(filename)
                  freqs = n_digit_freqs(d, n)
                  return freqs
              # Read digits from a txt file
              def txt_file_to_digits(filename, the_type=str):
                  """
                  Yield the digits of pi read from a .txt file.
                  """
                  with open(filename, 'r') as f:
                      for line in f.readlines():
                          for c in line:
                              if c != '\n' and c!= ' ':
                                  yield the_type(c)
              # Actual counting functions
              def one_digit_freqs(digits, normalize=False):
                  """
                  Consume digits of pi and compute 1 digit freq. counts.
                  """
                  freqs = np.zeros(10, dtype='i4')
                  for d in digits:
                      freqs[int(d)] += 1
                  if normalize:
                      freqs = freqs/freqs.sum()
                  return freqs
              def two_digit_freqs(digits, normalize=False):
                  """
                  Consume digits of pi and compute 2 digits freq. counts.
                  """
                  freqs = np.zeros(100, dtype='i4')
                  last = next(digits)
                  this = next(digits)
                  for d in digits:
                      index = int(last + this)
                      freqs[index] += 1
                      last = this
                      this = d
                  if normalize:
                      freqs = freqs/freqs.sum()
                  return freqs
              def n_digit_freqs(digits, n, normalize=False):
                  """
                  Consume digits of pi and compute n digits freq. counts.
                  This should only be used for 1-6 digits.
                  """
                  freqs = np.zeros(pow(10,n), dtype='i4')
                  current = np.zeros(n, dtype=int)
                  for i in range(n):
                      current[i] = next(digits)
                  for d in digits:
                      index = int(''.join(map(str, current)))
                      freqs[index] += 1
                      current[0:-1] = current[1:]
                      current[-1] = d
                  if normalize:
                      freqs = freqs/freqs.sum()
                  return freqs
              # Plotting functions
              def plot_two_digit_freqs(f2):
                  """
                  Plot two digits frequency counts using matplotlib.
                  """
                  f2_copy = f2.copy()
                  f2_copy.shape = (10,10)
                  ax = plt.matshow(f2_copy)
                  plt.colorbar()
                  for i in range(10):
                      for j in range(10):
                          plt.text(i-0.2, j+0.2, str(j)+str(i))
                  plt.ylabel('First digit')
                  plt.xlabel('Second digit')
                  return ax
              def plot_one_digit_freqs(f1):
                  """
                  Plot one digit frequency counts using matplotlib.
                  """
                  ax = plt.plot(f1,'bo-')
                  plt.title('Single digit counts in pi')
                  plt.xlabel('Digit')
                  plt.ylabel('Count')
                  return ax

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages