##// END OF EJS Templates
Adding new notebook examples....
Brian Granger -
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Basic Symbolic Quantum Mechanics</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"phi, psi = Ket('phi'), Ket('psi')\nalpha, beta = symbols('alpha beta', complex=True)","cell_type":"code","prompt_number":3},{"code":"state = alpha*psi + beta*phi; state\n","cell_type":"code","prompt_number":4},{"code":"ip = Dagger(state)*state; ip\n","cell_type":"code","prompt_number":5},{"code":"qapply(expand(ip))\n","cell_type":"code","prompt_number":6},{"code":"A = Operator('A')\nB = Operator('B')\nC = Operator('C')","cell_type":"code","prompt_number":7},{"code":"A*B == B*A\n","cell_type":"code","prompt_number":8},{"code":"expand((A+B)**2)","cell_type":"code","prompt_number":9},{"code":"comm = Commutator(A,B); comm\n","cell_type":"code","prompt_number":10},{"code":"comm.doit()","cell_type":"code","prompt_number":11},{"code":"comm = Commutator(A*B,B+C); comm","cell_type":"code","prompt_number":12},{"code":"comm.expand(commutator=True)","cell_type":"code","prompt_number":13},{"code":"_.doit().expand()\n","cell_type":"code","prompt_number":14},{"code":"Dagger(_)","cell_type":"code","prompt_number":15},{"code":"%notebook save basic_quantum.ipynb","cell_type":"code","prompt_number":16}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Gate Decomposition</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"CY10 = CGate(1, Y(0)); CY10\n","cell_type":"code","prompt_number":3},{"code":"CY10.decompose()\n","cell_type":"code","prompt_number":4},{"code":"circuit_plot(CY10.decompose(), nqubits=2)","cell_type":"code","prompt_number":5},{"code":"CZ01 = CGate(0, Z(1)); CZ01\n","cell_type":"code","prompt_number":6},{"code":"CZ01.decompose()\n","cell_type":"code","prompt_number":7},{"code":"circuit_plot(CZ01.decompose(), nqubits=2)","cell_type":"code","prompt_number":8},{"code":"SWAP10 = SWAP(1, 0); SWAP10\n","cell_type":"code","prompt_number":9},{"code":"SWAP10.decompose()","cell_type":"code","prompt_number":10},{"code":"circuit_plot(SWAP10.decompose(), nqubits=2)","cell_type":"code","prompt_number":11},{"code":"gates = [CGate(1,Y(0)), CGate(0,Z(1)), SWAP(1, 0)]","cell_type":"code","prompt_number":12},{"code":"for g in gates:\n dg = g.decompose()\n display(Eq(g, dg))\n circuit_plot(g, nqubits=2)\n circuit_plot(dg, nqubits=2) ","cell_type":"code","prompt_number":16},{"code":"%notebook save decomposition.ipynb","cell_type":"code","prompt_number":30}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Dense Coding\n</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":2},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":3},{"code":"psi = Qubit('00')/sqrt(2) + Qubit('11')/sqrt(2); psi\n","cell_type":"code","prompt_number":4},{"code":"circuits = [H(1)*CNOT(1,0), H(1)*CNOT(1,0)*X(1), H(1)*CNOT(1,0)*Z(1), H(1)*CNOT(1,0)*Z(1)*X(1)]","cell_type":"code","prompt_number":20},{"code":"for circuit in circuits:\n circuit_plot(circuit, nqubits=2)\n display(Eq(circuit*psi,qapply(circuit*psi)))","cell_type":"code","prompt_number":21},{"code":"%notebook save dense_coding.ipynb","cell_type":"code","prompt_number":28}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Grover's Algorithm</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"nqubits = 3\n","cell_type":"code","prompt_number":4},{"code":"def black_box(qubits):\n return True if qubits == IntQubit(1, qubits.nqubits) else False\n","cell_type":"code","prompt_number":3},{"code":"psi = superposition_basis(nqubits); psi\n","cell_type":"code","prompt_number":5},{"code":"v = OracleGate(nqubits, black_box)\n","cell_type":"code","prompt_number":6},{"code":"iter1 = qapply(grover_iteration(psi, v)); iter1\n","cell_type":"code","prompt_number":7},{"code":"iter2 = qapply(grover_iteration(iter1, v)); iter2\n","cell_type":"code","prompt_number":8},{"code":"measure_all_oneshot(iter2)\n","cell_type":"code","prompt_number":12},{"code":"%notebook save grovers.ipynb","cell_type":"code","prompt_number":28}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Quantum Error Correction</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"M0 = Z(1)*X(2)*X(3)*Z(4); M0\n","cell_type":"code","prompt_number":3},{"code":"M1 = Z(2)*X(3)*X(4)*Z(0); M1\n","cell_type":"code","prompt_number":4},{"code":"M2 = Z(3)*X(4)*X(0)*Z(1); M2\n","cell_type":"code","prompt_number":5},{"code":"M3 = Z(4)*X(0)*X(1)*Z(2); M3\n","cell_type":"code","prompt_number":6},{"code":"gate_simp(Commutator(M0,M1).doit())\n","cell_type":"code","prompt_number":7},{"code":"for o in [M0,M1,M2,M3]:\n display(gate_simp(o*o))\n","cell_type":"code","prompt_number":8},{"code":"zero = Rational(1,4)*(1+M0)*(1+M1)*(1+M2)*(1+M3)*IntQubit(0, 5); zero\n","cell_type":"code","prompt_number":9},{"code":"qapply(4*zero)\n","cell_type":"code","prompt_number":10},{"code":"one = Rational(1,4)*(1+M0)*(1+M1)*(1+M2)*(1+M3)*IntQubit(2**5-1, 5); one\n","cell_type":"code","prompt_number":11},{"code":"qapply(4*one)\n","cell_type":"code","prompt_number":12},{"code":"encoding_circuit = H(3)*H(4)*CNOT(2,0)*CNOT(3,0)*CNOT(4,0)*H(1)*H(4)*\\\n CNOT(2,1)*CNOT(4,1)*H(2)*CNOT(3,2)*CNOT(4,2)*H(3)*\\\n H(4)*CNOT(4, 3)*Z(4)*H(4)*Z(4)\n","cell_type":"code","prompt_number":13},{"code":"circuit_plot(encoding_circuit, nqubits=5, scale=0.5)","cell_type":"code","prompt_number":14},{"code":"represent(4*encoding_circuit, nqubits=5)","cell_type":"code","prompt_number":16},{"code":"%notebook save qerror.ipynb","cell_type":"code","prompt_number":23},{"code":"","cell_type":"code","prompt_number":23}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Teleportation</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"fourier = QFT(0,3).decompose(); fourier\n","cell_type":"code","prompt_number":3},{"code":"circuit_plot(fourier, nqubits=3)","cell_type":"code","prompt_number":4},{"code":"m = represent(fourier, nqubits=3)","cell_type":"code","prompt_number":12},{"code":"m","cell_type":"code","prompt_number":13},{"code":"represent(Fourier(0,3), nqubits=3)*4/sqrt(2)\n","cell_type":"code","prompt_number":5},{"code":"state = (Qubit('000') + Qubit('010') + Qubit('100') + Qubit('110'))/sqrt(4); state\n","cell_type":"code","prompt_number":6},{"code":"qapply(fourier*state)\n","cell_type":"code","prompt_number":7},{"code":"%notebook save qft.ipynb","cell_type":"code","prompt_number":23}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Symbolic Quantum Computing</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":2},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":3},{"code":"alpha, beta = symbols('alpha beta',real=True)","cell_type":"code","prompt_number":4},{"code":"psi = alpha*Qubit('00') + beta*Qubit('11'); psi\n","cell_type":"code","prompt_number":5},{"code":"Dagger(psi)\n","cell_type":"code","prompt_number":6},{"code":"qapply(Dagger(Qubit('00'))*psi)\n","cell_type":"code","prompt_number":7},{"code":"for state, prob in measure_all(psi):\n display(state)\n display(prob)\n","cell_type":"code","prompt_number":8},{"code":"represent(psi, nqubits=2)\n","cell_type":"code","prompt_number":9},{"code":"g = X(0); g\n","cell_type":"code","prompt_number":10},{"code":"represent(g, nqubits=2)\n","cell_type":"code","prompt_number":11},{"code":"c = H(0)*Qubit('00'); c\n","cell_type":"code","prompt_number":12},{"code":"qapply(c)\n","cell_type":"code","prompt_number":13},{"code":"for g1 in (Y,Z,H):\n for g2 in (Y,Z,H):\n e = Commutator(g1(0),g2(0))\n if g1 != g2:\n display(Eq(e,e.doit()))\n","cell_type":"code","prompt_number":14},{"code":"c = H(0)*X(1)*H(0)**2*CNOT(0,1)*X(1)**3*X(0)*Z(2)**2*S(3)**3; c\n","cell_type":"code","prompt_number":24},{"code":"circuit_plot(c, nqubits=4)","cell_type":"code","prompt_number":25},{"code":"gate_simp(c)\n","cell_type":"code","prompt_number":16},{"code":"circuit_plot(gate_simp(c),nqubits=5)","cell_type":"code","prompt_number":23},{"code":"%notebook save quantum_computing.ipynb","cell_type":"code","prompt_number":35}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Teleportation</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"a,b = symbols('ab', real=True)\nstate = Qubit('000')*a + Qubit('001')*b; state","cell_type":"code","prompt_number":3},{"code":"entangle1_2 = CNOT(1,2)*HadamardGate(1); entangle1_2\n","cell_type":"code","prompt_number":4},{"code":"state = qapply(entangle1_2*state); state\n","cell_type":"code","prompt_number":5},{"code":"entangle0_1 = HadamardGate(0)*CNOT(0,1); entangle0_1\n","cell_type":"code","prompt_number":6},{"code":"circuit_plot(entangle0_1*entangle1_2, nqubits=3)\n","cell_type":"code","prompt_number":7},{"code":"state = qapply(entangle0_1*state); state\n","cell_type":"code","prompt_number":8},{"code":"result = measure_partial(state, (0,1))\n","cell_type":"code","prompt_number":10},{"code":"state = (result[2][0]*2).expand(); state","cell_type":"code","prompt_number":11},{"code":"state = qapply(XGate(2)*state); state\n","cell_type":"code","prompt_number":12},{"code":"%notebook save teleportation.ipynb","cell_type":"code","prompt_number":13},{"code":"","cell_type":"code","prompt_number":18}]} No newline at end of file
@@ -0,0 +1,1 b''
1 {"cells":[{"cell_type":"text","text":"<h1>Text Analysis Using NetworkX</h1>"},{"cell_type":"text","text":"<p>This notebook will analyze a plain text file treating it as a list of\nnewline-separated sentences (e.g. a list of paper titles).</p>\n<br>\n<p>It computes word frequencies (after doing some naive normalization by\nlowercasing and throwing away a few overly common words). It also computes,\nfrom the most common words, a weighted graph of word co-occurrences and\ndisplays it, as well as summarizing the graph structure by ranking its nodes in\ndescending order of eigenvector centrality.</p>\n<br>\n<p>This is meant as an illustration of text processing in Python, using matplotlib\nfor visualization and NetworkX for graph-theoretical manipulation. It should\nnot be considered production-strength code for serious text analysis.</p>\n<br>\n<p>Author: Fernando Perez</p>"},{"code":"%run text_analysis.py","cell_type":"code","prompt_number":3},{"code":"default_url = \"http://bibserver.berkeley.edu/tmp/titles.txt\"\nn_words = 15\nn_nodes = 15\nurl = default_url\n ","cell_type":"code","prompt_number":4},{"cell_type":"text","text":"Fetch text and do basic preprocessing."},{"code":"text = get_text_from_url(url).lower()\nlines = text.splitlines()\nwords = text_cleanup(text)","cell_type":"code","prompt_number":5},{"cell_type":"text","text":"Compute frequency histogram."},{"code":"wf = word_freq(words)\nsorted_wf = sort_freqs(wf)","cell_type":"code","prompt_number":6},{"cell_type":"text","text":"Build a graph from the n_nodes most frequent words."},{"code":"popular = sorted_wf[-n_nodes:]\npop_words = [wc[0] for wc in popular]\nco_occur = co_occurrences(lines, pop_words)\nwgraph = co_occurrences_graph(popular, co_occur, cutoff=1)\ncentrality = nx.eigenvector_centrality_numpy(wgraph)\n","cell_type":"code","prompt_number":7},{"cell_type":"text","text":"Print summaries of single-word frequencies and graph structure."},{"code":"summarize_freq_hist(sorted_wf)\nsummarize_centrality(centrality)","cell_type":"code","prompt_number":8},{"cell_type":"text","text":"Plot histogram and graph."},{"code":"plot_word_histogram(sorted_wf, n_words,\"Frequencies for %s most frequent words\" % n_words)","cell_type":"code","prompt_number":9},{"code":"plot_word_histogram(sorted_wf, 1.0, \"Frequencies for entire word list\")\n","cell_type":"code","prompt_number":10},{"code":"plot_graph(wgraph)","cell_type":"code","prompt_number":11},{"code":"%notebook save text_analysis.ipynb","cell_type":"code","prompt_number":10}]} No newline at end of file
@@ -0,0 +1,373 b''
1 #!/usr/bin/env python
2 """Simple text analysis: word frequencies and co-occurrence graph.
3
4 Usage:
5
6 text_analysis.py [text_file]
7
8 This script will analize a plain text file treating it as a list of
9 newline-separated sentences (e.g. a list of paper titles).
10
11 It computes word frequencies (after doing some naive normalization by
12 lowercasing and throwing away a few overly common words). It also computes,
13 from the most common words, a weighted graph of word co-occurrences and
14 displays it, as well as summarizing the graph structure by ranking its nodes in
15 descending order of eigenvector centrality.
16
17 This is meant as an illustration of text processing in Python, using matplotlib
18 for visualization and NetworkX for graph-theoretical manipulation. It should
19 not be considered production-strength code for serious text analysis.
20
21 Author: Fernando Perez <fernando.perez@berkeley.edu>
22 """
23
24 #-----------------------------------------------------------------------------
25 # Imports
26 #-----------------------------------------------------------------------------
27
28 # From the standard library
29 import os
30 import re
31 import sys
32 import urllib2
33
34 # Third-party libraries
35 import networkx as nx
36 import numpy as np
37
38 from matplotlib import pyplot as plt
39
40 #-----------------------------------------------------------------------------
41 # Function definitions
42 #-----------------------------------------------------------------------------
43
44 def rescale_arr(arr,amin,amax):
45 """Rescale an array to a new range.
46
47 Return a new array whose range of values is (amin,amax).
48
49 Parameters
50 ----------
51 arr : array-like
52
53 amin : float
54 new minimum value
55
56 amax : float
57 new maximum value
58
59 Examples
60 --------
61 >>> a = np.arange(5)
62
63 >>> rescale_arr(a,3,6)
64 array([ 3. , 3.75, 4.5 , 5.25, 6. ])
65 """
66
67 # old bounds
68 m = arr.min()
69 M = arr.max()
70 # scale/offset
71 s = float(amax-amin)/(M-m)
72 d = amin - s*m
73
74 # Apply clip before returning to cut off possible overflows outside the
75 # intended range due to roundoff error, so that we can absolutely guarantee
76 # that on output, there are no values > amax or < amin.
77 return np.clip(s*arr+d,amin,amax)
78
79
80 def all_pairs(items):
81 """Make all unique pairs (order doesn't matter)"""
82 pairs = []
83 nitems = len(items)
84 for i, wi in enumerate(items):
85 for j in range(i+1, nitems):
86 pairs.append((wi, items[j]))
87 return pairs
88
89
90 def text_cleanup(text, min_length=3,
91 remove = set(['for', 'the', 'and', 'with'])):
92 """Clean up a list of lowercase strings of text for simple analysis.
93
94 Splits on whitespace, removes all 'words' less than `min_length` characters
95 long, and those in the `remove` set.
96
97 Returns a list of strings.
98 """
99 return [w for w in text.lower().split()
100 if len(w)>=min_length and w not in remove]
101
102
103 def print_vk(lst):
104 """Print a list of value/key pairs nicely formatted in key/value order."""
105
106 # Find the longest key: remember, the list has value/key paris, so the key
107 # is element [1], not [0]
108 longest_key = max([len(word) for word, count in lst])
109 # Make a format string out of it
110 fmt = '%'+str(longest_key)+'s -> %s'
111 # Do actual printing
112 for k,v in lst:
113 print fmt % (k,v)
114
115
116 def word_freq(text):
117 """Return a dictionary of word frequencies for the given text.
118
119 Input text should be given as an iterable of strings."""
120
121 freqs = {}
122 for word in text:
123 freqs[word] = freqs.get(word, 0) + 1
124 return freqs
125
126
127 def sort_freqs(freqs):
128 """Sort a word frequency histogram represented as a dictionary.
129
130 Parameters
131 ----------
132 freqs : dict
133 A dict with string keys and integer values.
134
135 Return
136 ------
137 items : list
138 A list of (count, word) pairs.
139 """
140 items = freqs.items()
141 items.sort(key = lambda wc: wc[1])
142 return items
143 ## words,counts = freqs.keys(),freqs.values()
144 ## # Sort by count
145 ## items = zip(counts,words)
146 ## items.sort()
147 ## return items
148
149
150 def summarize_freq_hist(freqs, n=10):
151 """Print a simple summary of a word frequencies dictionary.
152
153 Paramters
154 ---------
155 freqs : dict or list
156 Word frequencies, represented either as a dict of word->count, or as a
157 list of count->word pairs.
158
159 n : int
160 The number of least/most frequent words to print.
161 """
162
163 items = sort_freqs(freqs) if isinstance(freqs, dict) else freqs
164 print 'Number of unique words:',len(freqs)
165 print
166 print '%d least frequent words:' % n
167 print_vk(items[:n])
168 print
169 print '%d most frequent words:' % n
170 print_vk(items[-n:])
171
172
173 def get_text_from_url(url):
174 """Given a url (local file path or remote url), read its contents.
175
176 If it's a remote URL, it downloads the file and leaves it locally cached
177 for future runs. If the local matching file is found, no download is made.
178
179 Returns
180 -------
181 text : string
182 The contents of the file.
183 """
184 if url.startswith('http'):
185 # remote file, fetch only if needed
186 fname = os.path.split(url)[1]
187 if os.path.isfile(fname):
188 with open(fname, 'r') as f:
189 text = f.read()
190 else:
191 with open(fname, 'w') as f:
192 text = urllib2.urlopen(url).read()
193 f.write(text)
194 else:
195 with open(url, 'r') as f:
196 text = f.read()
197 return text
198
199
200 def co_occurrences(lines, words):
201 """Return histogram of co-occurrences of words in a list of lines.
202
203 Parameters
204 ----------
205 lines : list
206 A list of strings considered as 'sentences' to search for co-occurrences.
207
208 words : list
209 A list of words from which all unordered pairs will be constructed and
210 searched for co-occurrences.
211 """
212 wpairs = all_pairs(words)
213
214 # Now build histogram of co-occurrences
215 co_occur = {}
216 for w1, w2 in wpairs:
217 rx = re.compile('%s .*%s|%s .*%s' % (w1, w2, w2, w1))
218 co_occur[w1, w2] = sum([1 for line in lines if rx.search(line)])
219
220 return co_occur
221
222
223 def co_occurrences_graph(word_hist, co_occur, cutoff=0):
224 """Convert a word histogram with co-occurrences to a weighted graph.
225
226 Edges are only added if the count is above cutoff.
227 """
228 g = nx.Graph()
229 for word, count in word_hist:
230 g.add_node(word, count=count)
231 for (w1, w2), count in co_occur.iteritems():
232 if count<=cutoff:
233 continue
234 g.add_edge(w1, w2, weight=count)
235 return g
236
237
238 def plot_graph(wgraph, pos=None):
239 """Conveniently summarize graph visually"""
240 # Plot nodes with size according to count
241 sizes = []
242 degrees = []
243 for n, d in wgraph.nodes_iter(data=True):
244 sizes.append(d['count'])
245 degrees.append(wgraph.degree(n))
246 sizes = rescale_arr(np.array(sizes, dtype=float), 100, 1000)
247
248 # Compute layout and label edges according to weight
249 pos = nx.spring_layout(wgraph) if pos is None else pos
250 labels = {}
251 width = []
252 for n1, n2, d in wgraph.edges_iter(data=True):
253 w = d['weight']
254 labels[n1, n2] = w
255 width.append(w)
256
257 # remap width to 1-10 range
258 width = rescale_arr(np.array(width, dtype=float), 1, 15)
259
260 # Create figure
261 fig, ax = plt.subplots()
262 fig.subplots_adjust(0,0,1)
263 nx.draw_networkx_nodes(wgraph, pos, node_size=sizes, node_color=degrees,
264 alpha=0.8)
265 nx.draw_networkx_labels(wgraph, pos, font_size=15, font_weight='bold')
266 nx.draw_networkx_edges(wgraph, pos, width=width, edge_color=width,
267 edge_cmap=plt.cm.Blues)
268 nx.draw_networkx_edge_labels(wgraph, pos, edge_labels=labels)
269 ax.set_title('Node color:degree, size:count, edge: co-occurrence count')
270
271
272 def plot_word_histogram(freqs, show=10, title=None):
273 """Plot a histogram of word frequencies, limited to the top `show` ones.
274 """
275 sorted_f = sort_freqs(freqs) if isinstance(freqs, dict) else freqs
276
277 # Don't show the tail
278 if isinstance(show, int):
279 # interpret as number of words to show in histogram
280 show_f = sorted_f[-show:]
281 else:
282 # interpret as a fraction
283 start = -int(round(show*len(freqs)))
284 show_f = sorted_f[start:]
285
286 # Now, extract words and counts, plot
287 n_words = len(show_f)
288 ind = np.arange(n_words)
289 words = [i[0] for i in show_f]
290 counts = [i[1] for i in show_f]
291
292 fig, ax = plt.subplots()
293 if n_words<=20:
294 # Only show bars and x labels for small histograms, they don't make
295 # sense otherwise
296 ax.bar(ind, counts)
297 ax.set_xticks(ind)
298 ax.set_xticklabels(words, rotation=45)
299 fig.subplots_adjust(bottom=0.25)
300 else:
301 # For larger ones, do a step plot
302 ax.step(ind, counts)
303
304 # If it spans more than two decades, use a log scale
305 if float(max(counts))/min(counts) > 100:
306 ax.set_yscale('log')
307
308 if title:
309 ax.set_title(title)
310 return ax
311
312
313 def summarize_centrality(centrality):
314 c = centrality.items()
315 c.sort(key=lambda x:x[1], reverse=True)
316 print '\nGraph centrality'
317 for node, cent in c:
318 print "%15s: %.3g" % (node, cent)
319
320 #-----------------------------------------------------------------------------
321 # Main script
322 #-----------------------------------------------------------------------------
323
324 # if __name__ == '__main__':
325
326 # # Configure user variables here
327 # # Specify the url (can be a local file path) of the text file to analyze.
328 # # If not given, it's read from the command line as the first argument
329 #
330 # # 11226 titles of recent articles in arxiv/math/prob
331 # default_url = "http://bibserver.berkeley.edu/tmp/titles.txt"
332 # # Number of words to display in detailed histogram
333 # n_words = 15
334 # # Number of words to use as nodes for co-occurrence graph.
335 # n_nodes = 15
336 #
337 # # End of user configuration
338 #
339 # # Actual code starts here
340 # try:
341 # url = sys.argv[1]
342 # except IndexError:
343 # url = default_url
344 #
345 # # Fetch text and do basic preprocessing
346 # text = get_text_from_url(url).lower()
347 # lines = text.splitlines()
348 # words = text_cleanup(text)
349 #
350 # # Compute frequency histogram
351 # wf = word_freq(words)
352 # sorted_wf = sort_freqs(wf)
353 #
354 # # Build a graph from the n_nodes most frequent words
355 # popular = sorted_wf[-n_nodes:]
356 # pop_words = [wc[0] for wc in popular]
357 # co_occur = co_occurrences(lines, pop_words)
358 # wgraph = co_occurrences_graph(popular, co_occur, cutoff=1)
359 # centrality = nx.eigenvector_centrality_numpy(wgraph)
360 #
361 # # Print summaries of single-word frequencies and graph structure
362 # summarize_freq_hist(sorted_wf)
363 # summarize_centrality(centrality)
364 #
365 # # Plot histogram and graph
366 # plt.close('all')
367 # plot_word_histogram(sorted_wf, n_words,
368 # "Frequencies for %s most frequent words" % n_words)
369 # plot_word_histogram(sorted_wf, 1.0, "Frequencies for entire word list")
370 # plot_graph(wgraph)
371 #
372 # # Display figures
373 # plt.show()
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now