wordfreq.py
65 lines
| 1.9 KiB
| text/x-python
|
PythonLexer
Brian E Granger
|
r1337 | """Count the frequencies of words in a string""" | |
Brian Granger
|
r2314 | from __future__ import division | |
import cmath as math | |||
Brian E Granger
|
r1337 | def wordfreq(text): | |
"""Return a dictionary of words and word counts in a string.""" | |||
freqs = {} | |||
for word in text.split(): | |||
Brian Granger
|
r2314 | lword = word.lower() | |
freqs[lword] = freqs.get(lword, 0) + 1 | |||
Brian E Granger
|
r1337 | return freqs | |
Brian Granger
|
r2314 | ||
Brian E Granger
|
r1337 | def print_wordfreq(freqs, n=10): | |
"""Print the n most common words and counts in the freqs dict.""" | |||
words, counts = freqs.keys(), freqs.values() | |||
items = zip(counts, words) | |||
items.sort(reverse=True) | |||
for (count, word) in items[:n]: | |||
print word, count | |||
Brian Granger
|
r2314 | ||
bgranger
|
r2337 | def wordfreq_to_weightsize(worddict, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0): | |
Brian Granger
|
r2314 | mincount = min(worddict.itervalues()) | |
maxcount = max(worddict.itervalues()) | |||
weights = {} | |||
for k, v in worddict.iteritems(): | |||
w = (v-mincount)/(maxcount-mincount) | |||
alpha = minalpha + (maxalpha-minalpha)*w | |||
size = minsize + (maxsize-minsize)*w | |||
weights[k] = (alpha, size) | |||
return weights | |||
bgranger
|
r2337 | def tagcloud(worddict, n=10, minsize=25, maxsize=50, minalpha=0.5, maxalpha=1.0): | |
Brian Granger
|
r2314 | from matplotlib import pyplot as plt | |
import random | |||
worddict = wordfreq_to_weightsize(worddict, minsize, maxsize, minalpha, maxalpha) | |||
fig = plt.figure() | |||
ax = fig.add_subplot(111) | |||
ax.set_position([0.0,0.0,1.0,1.0]) | |||
plt.xticks([]) | |||
plt.yticks([]) | |||
words = worddict.keys() | |||
alphas = [v[0] for v in worddict.values()] | |||
sizes = [v[1] for v in worddict.values()] | |||
items = zip(alphas, sizes, words) | |||
items.sort(reverse=True) | |||
for alpha, size, word in items[:n]: | |||
bgranger
|
r2337 | # xpos = random.normalvariate(0.5, 0.3) | |
# ypos = random.normalvariate(0.5, 0.3) | |||
xpos = random.uniform(0.0,1.0) | |||
ypos = random.uniform(0.0,1.0) | |||
Brian Granger
|
r2314 | ax.text(xpos, ypos, word.lower(), alpha=alpha, fontsize=size) | |
ax.autoscale_view() | |||
return ax | |||