upstream/ipython Files · examples/Parallel Computing/daVinci Word Count/wordfreq.py

Initial messing around....

Initial messing around. Latex tab completion will have to be done outside the normal completer logic as the completer line splitting logic uses \\ as a special character to split lines on. I probably want to put the latex completions first and it if finds any matches, don't do any other completion logic. The only issue is that might short circuit dir/path matching on windows. Hmmm.

Brian E. Granger - - Load All Authors

File last commit:

r16120:24b93a1d


                r17700:7b6d94ef

Download file

             wordfreq.py
        
                    69 lines
            
             | 2.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / examples / Parallel Computing / daVinci Word Count / wordfreq.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        MinRK
    
updates to docs and examples

              r3670
            
      """Count the frequencies of words in a string"""

      from __future__ import division

        Thomas Kluyver
    
Update print syntax in parallel examples.

              r6455
            
      from __future__ import print_function

        MinRK
    
updates to docs and examples

              r3670
            
      import cmath as math

        MinRK
    
remove kernel examples already ported to newparallel

              r3675
            
      def wordfreq(text, is_filename=False):

        MinRK
    
updates to docs and examples

              r3670
            
          """Return a dictionary of words and word counts in a string."""

        MinRK
    
remove kernel examples already ported to newparallel

              r3675
            
          if is_filename:

              with open(text) as f:

                  text = f.read()

        MinRK
    
updates to docs and examples

              r3670
            
          freqs = {}

          for word in text.split():

              lword = word.lower()

              freqs[lword] = freqs.get(lword, 0) + 1

          return freqs

      def print_wordfreq(freqs, n=10):

          """Print the n most common words and counts in the freqs dict."""

          words, counts = freqs.keys(), freqs.values()

          items = zip(counts, words)

          items.sort(reverse=True)

          for (count, word) in items[:n]:

        Thomas Kluyver
    
Update print syntax in parallel examples.

              r6455
            
              print(word, count)

        MinRK
    
updates to docs and examples

              r3670
            
      def wordfreq_to_weightsize(worddict, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):

          mincount = min(worddict.itervalues())

          maxcount = max(worddict.itervalues())

          weights = {}

          for k, v in worddict.iteritems():

              w = (v-mincount)/(maxcount-mincount)

              alpha = minalpha + (maxalpha-minalpha)*w

              size = minsize + (maxsize-minsize)*w

              weights[k] = (alpha, size)

          return weights

      def tagcloud(worddict, n=10, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):

          from matplotlib import pyplot as plt

          import random

          worddict = wordfreq_to_weightsize(worddict, minsize, maxsize, minalpha, maxalpha)

          fig = plt.figure()

          ax = fig.add_subplot(111)

          ax.set_position([0.0,0.0,1.0,1.0])

          plt.xticks([])

          plt.yticks([])

          words = worddict.keys()

          alphas = [v[0] for v in worddict.values()]

          sizes = [v[1] for v in worddict.values()]

          items = zip(alphas, sizes, words)

          items.sort(reverse=True)

          for alpha, size, word in items[:n]:

              # xpos = random.normalvariate(0.5, 0.3)

              # ypos = random.normalvariate(0.5, 0.3)

              xpos = random.uniform(0.0,1.0)

              ypos = random.uniform(0.0,1.0)

              ax.text(xpos, ypos, word.lower(), alpha=alpha, fontsize=size)

          ax.autoscale_view()

          return ax

        Thomas Kluyver
    
Update print syntax in parallel examples.

              r6455

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

MinRK updates to docs and examples	r3670	"""Count the frequencies of words in a string"""

		from __future__ import division
Thomas Kluyver Update print syntax in parallel examples.	r6455	from __future__ import print_function
MinRK updates to docs and examples	r3670
		import cmath as math


MinRK remove kernel examples already ported to newparallel	r3675	def wordfreq(text, is_filename=False):
MinRK updates to docs and examples	r3670	"""Return a dictionary of words and word counts in a string."""
MinRK remove kernel examples already ported to newparallel	r3675	if is_filename:
		with open(text) as f:
		text = f.read()
MinRK updates to docs and examples	r3670	freqs = {}
		for word in text.split():
		lword = word.lower()
		freqs[lword] = freqs.get(lword, 0) + 1
		return freqs


		def print_wordfreq(freqs, n=10):
		"""Print the n most common words and counts in the freqs dict."""

		words, counts = freqs.keys(), freqs.values()
		items = zip(counts, words)
		items.sort(reverse=True)
		for (count, word) in items[:n]:
Thomas Kluyver Update print syntax in parallel examples.	r6455	print(word, count)
MinRK updates to docs and examples	r3670

		def wordfreq_to_weightsize(worddict, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):
		mincount = min(worddict.itervalues())
		maxcount = max(worddict.itervalues())
		weights = {}
		for k, v in worddict.iteritems():
		w = (v-mincount)/(maxcount-mincount)
		alpha = minalpha + (maxalpha-minalpha)*w
		size = minsize + (maxsize-minsize)*w
		weights[k] = (alpha, size)
		return weights


		def tagcloud(worddict, n=10, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):
		from matplotlib import pyplot as plt
		import random

		worddict = wordfreq_to_weightsize(worddict, minsize, maxsize, minalpha, maxalpha)

		fig = plt.figure()
		ax = fig.add_subplot(111)
		ax.set_position([0.0,0.0,1.0,1.0])
		plt.xticks([])
		plt.yticks([])

		words = worddict.keys()
		alphas = [v[0] for v in worddict.values()]
		sizes = [v[1] for v in worddict.values()]
		items = zip(alphas, sizes, words)
		items.sort(reverse=True)
		for alpha, size, word in items[:n]:
		# xpos = random.normalvariate(0.5, 0.3)
		# ypos = random.normalvariate(0.5, 0.3)
		xpos = random.uniform(0.0,1.0)
		ypos = random.uniform(0.0,1.0)
		ax.text(xpos, ypos, word.lower(), alpha=alpha, fontsize=size)
		ax.autoscale_view()
		return ax

Thomas Kluyver Update print syntax in parallel examples.	r6455