upstream/ipython Files · docs/examples/parallel/davinci/pwordfreq.py

receive tasks, even when no engines are registered...

receive tasks, even when no engines are registered Previously, tasks submitted when no engines were registered were left in the upstream ZMQ queue. This prevented the tasks being entered into the Hub's database.

Fernando Perez - - Load All Authors

File last commit:

r4910:0dc49390


                r6092:11fd67e3

Download file

             pwordfreq.py
        
                    80 lines
            
             | 2.2 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / docs / examples / parallel / davinci / pwordfreq.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        MinRK
    
updates to docs and examples

              r3670
            
      #!/usr/bin/env python

      """Parallel word frequency counter.

      This only works for a local cluster, because the filenames are local paths.

      """

      import os

        MinRK
    
update a few parallel examples...

              r4184
            
      import time

        MinRK
    
updates to docs and examples

              r3670
            
      import urllib

      from itertools import repeat

      from wordfreq import print_wordfreq, wordfreq

      from IPython.parallel import Client, Reference

      davinci_url = "http://www.gutenberg.org/cache/epub/5000/pg5000.txt"

      def pwordfreq(view, fnames):

          """Parallel word frequency counter.

          view - An IPython DirectView

          fnames - The filenames containing the split data.

          """

          assert len(fnames) == len(view.targets)

          view.scatter('fname', fnames, flatten=True)

          ar = view.apply(wordfreq, Reference('fname'))

          freqs_list = ar.get()

          word_set = set()

          for f in freqs_list:

              word_set.update(f.keys())

          freqs = dict(zip(word_set, repeat(0)))

          for f in freqs_list:

              for word, count in f.iteritems():

                  freqs[word] += count

          return freqs

      if __name__ == '__main__':

          # Create a Client and View

          rc = Client()

          view = rc[:]

          if not os.path.exists('davinci.txt'):

              # download from project gutenberg

              print "Downloading Da Vinci's notebooks from Project Gutenberg"

              urllib.urlretrieve(davinci_url, 'davinci.txt')

          # Run the serial version

          print "Serial word frequency count:"

          text = open('davinci.txt').read()

        MinRK
    
update a few parallel examples...

              r4184
            
          tic = time.time()

        MinRK
    
updates to docs and examples

              r3670
            
          freqs = wordfreq(text)

        MinRK
    
update a few parallel examples...

              r4184
            
          toc = time.time()

        MinRK
    
updates to docs and examples

              r3670
            
          print_wordfreq(freqs, 10)

        MinRK
    
update a few parallel examples...

              r4184
            
          print "Took %.3f s to calcluate"%(toc-tic)

        MinRK
    
updates to docs and examples

              r3670
            
          # The parallel version

          print "\nParallel word frequency count:"

          # split the davinci.txt into one file per engine:

          lines = text.splitlines()

          nlines = len(lines)

          n = len(rc)

          block = nlines/n

          for i in range(n):

              chunk = lines[i*block:i*(block+1)]

              with open('davinci%i.txt'%i, 'w') as f:

                  f.write('\n'.join(chunk))

        Jörgen Stenarson
    
Search of getcwd and replace with getcwdu. Ignoring core/prompts.py

              r4208
            
          cwd = os.path.abspath(os.getcwdu())

        MinRK
    
updates to docs and examples

              r3670
            
          fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)]

        MinRK
    
update a few parallel examples...

              r4184
            
          tic = time.time()

        MinRK
    
updates to docs and examples

              r3670
            
          pfreqs = pwordfreq(view,fnames)

        MinRK
    
update a few parallel examples...

              r4184
            
          toc = time.time()

        MinRK
    
updates to docs and examples

              r3670
            
          print_wordfreq(freqs)

        MinRK
    
update a few parallel examples...

              r4184
            
          print "Took %.3f s to calcluate on %i engines"%(toc-tic, len(view.targets))

        MinRK
    
updates to docs and examples

              r3670
            
          # cleanup split files

          map(os.remove, fnames)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

MinRK updates to docs and examples	r3670	#!/usr/bin/env python
		"""Parallel word frequency counter.

		This only works for a local cluster, because the filenames are local paths.
		"""


		import os
MinRK update a few parallel examples...	r4184	import time
MinRK updates to docs and examples	r3670	import urllib

		from itertools import repeat

		from wordfreq import print_wordfreq, wordfreq

		from IPython.parallel import Client, Reference

		davinci_url = "http://www.gutenberg.org/cache/epub/5000/pg5000.txt"

		def pwordfreq(view, fnames):
		"""Parallel word frequency counter.

		view - An IPython DirectView
		fnames - The filenames containing the split data.
		"""
		assert len(fnames) == len(view.targets)
		view.scatter('fname', fnames, flatten=True)
		ar = view.apply(wordfreq, Reference('fname'))
		freqs_list = ar.get()
		word_set = set()
		for f in freqs_list:
		word_set.update(f.keys())
		freqs = dict(zip(word_set, repeat(0)))
		for f in freqs_list:
		for word, count in f.iteritems():
		freqs[word] += count
		return freqs

		if __name__ == '__main__':
		# Create a Client and View
		rc = Client()

		view = rc[:]

		if not os.path.exists('davinci.txt'):
		# download from project gutenberg
		print "Downloading Da Vinci's notebooks from Project Gutenberg"
		urllib.urlretrieve(davinci_url, 'davinci.txt')

		# Run the serial version
		print "Serial word frequency count:"
		text = open('davinci.txt').read()
MinRK update a few parallel examples...	r4184	tic = time.time()
MinRK updates to docs and examples	r3670	freqs = wordfreq(text)
MinRK update a few parallel examples...	r4184	toc = time.time()
MinRK updates to docs and examples	r3670	print_wordfreq(freqs, 10)
MinRK update a few parallel examples...	r4184	print "Took %.3f s to calcluate"%(toc-tic)
MinRK updates to docs and examples	r3670

		# The parallel version
		print "\nParallel word frequency count:"
		# split the davinci.txt into one file per engine:
		lines = text.splitlines()
		nlines = len(lines)
		n = len(rc)
		block = nlines/n
		for i in range(n):
		chunk = lines[iblock:i(block+1)]
		with open('davinci%i.txt'%i, 'w') as f:
		f.write('\n'.join(chunk))

Jörgen Stenarson Search of getcwd and replace with getcwdu. Ignoring core/prompts.py	r4208	cwd = os.path.abspath(os.getcwdu())
MinRK updates to docs and examples	r3670	fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)]
MinRK update a few parallel examples...	r4184	tic = time.time()
MinRK updates to docs and examples	r3670	pfreqs = pwordfreq(view,fnames)
MinRK update a few parallel examples...	r4184	toc = time.time()
MinRK updates to docs and examples	r3670	print_wordfreq(freqs)
MinRK update a few parallel examples...	r4184	print "Took %.3f s to calcluate on %i engines"%(toc-tic, len(view.targets))
MinRK updates to docs and examples	r3670	# cleanup split files
		map(os.remove, fnames)