|
|
"""Count the frequencies of words in a string"""
|
|
|
|
|
|
from __future__ import division
|
|
|
|
|
|
import cmath as math
|
|
|
|
|
|
|
|
|
def wordfreq(text, is_filename=False):
|
|
|
"""Return a dictionary of words and word counts in a string."""
|
|
|
if is_filename:
|
|
|
with open(text) as f:
|
|
|
text = f.read()
|
|
|
freqs = {}
|
|
|
for word in text.split():
|
|
|
lword = word.lower()
|
|
|
freqs[lword] = freqs.get(lword, 0) + 1
|
|
|
return freqs
|
|
|
|
|
|
|
|
|
def print_wordfreq(freqs, n=10):
|
|
|
"""Print the n most common words and counts in the freqs dict."""
|
|
|
|
|
|
words, counts = freqs.keys(), freqs.values()
|
|
|
items = zip(counts, words)
|
|
|
items.sort(reverse=True)
|
|
|
for (count, word) in items[:n]:
|
|
|
print word, count
|
|
|
|
|
|
|
|
|
def wordfreq_to_weightsize(worddict, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):
|
|
|
mincount = min(worddict.itervalues())
|
|
|
maxcount = max(worddict.itervalues())
|
|
|
weights = {}
|
|
|
for k, v in worddict.iteritems():
|
|
|
w = (v-mincount)/(maxcount-mincount)
|
|
|
alpha = minalpha + (maxalpha-minalpha)*w
|
|
|
size = minsize + (maxsize-minsize)*w
|
|
|
weights[k] = (alpha, size)
|
|
|
return weights
|
|
|
|
|
|
|
|
|
def tagcloud(worddict, n=10, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0):
|
|
|
from matplotlib import pyplot as plt
|
|
|
import random
|
|
|
|
|
|
worddict = wordfreq_to_weightsize(worddict, minsize, maxsize, minalpha, maxalpha)
|
|
|
|
|
|
fig = plt.figure()
|
|
|
ax = fig.add_subplot(111)
|
|
|
ax.set_position([0.0,0.0,1.0,1.0])
|
|
|
plt.xticks([])
|
|
|
plt.yticks([])
|
|
|
|
|
|
words = worddict.keys()
|
|
|
alphas = [v[0] for v in worddict.values()]
|
|
|
sizes = [v[1] for v in worddict.values()]
|
|
|
items = zip(alphas, sizes, words)
|
|
|
items.sort(reverse=True)
|
|
|
for alpha, size, word in items[:n]:
|
|
|
# xpos = random.normalvariate(0.5, 0.3)
|
|
|
# ypos = random.normalvariate(0.5, 0.3)
|
|
|
xpos = random.uniform(0.0,1.0)
|
|
|
ypos = random.uniform(0.0,1.0)
|
|
|
ax.text(xpos, ypos, word.lower(), alpha=alpha, fontsize=size)
|
|
|
ax.autoscale_view()
|
|
|
return ax
|
|
|
|
|
|
|