Show More
The requested changes are too big and content was truncated. Show full diff
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Basic Symbolic Quantum Mechanics</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"phi, psi = Ket('phi'), Ket('psi')\nalpha, beta = symbols('alpha beta', complex=True)","cell_type":"code","prompt_number":3},{"code":"state = alpha*psi + beta*phi; state\n","cell_type":"code","prompt_number":4},{"code":"ip = Dagger(state)*state; ip\n","cell_type":"code","prompt_number":5},{"code":"qapply(expand(ip))\n","cell_type":"code","prompt_number":6},{"code":"A = Operator('A')\nB = Operator('B')\nC = Operator('C')","cell_type":"code","prompt_number":7},{"code":"A*B == B*A\n","cell_type":"code","prompt_number":8},{"code":"expand((A+B)**2)","cell_type":"code","prompt_number":9},{"code":"comm = Commutator(A,B); comm\n","cell_type":"code","prompt_number":10},{"code":"comm.doit()","cell_type":"code","prompt_number":11},{"code":"comm = Commutator(A*B,B+C); comm","cell_type":"code","prompt_number":12},{"code":"comm.expand(commutator=True)","cell_type":"code","prompt_number":13},{"code":"_.doit().expand()\n","cell_type":"code","prompt_number":14},{"code":"Dagger(_)","cell_type":"code","prompt_number":15},{"code":"%notebook save basic_quantum.ipynb","cell_type":"code","prompt_number":16}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Gate Decomposition</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"CY10 = CGate(1, Y(0)); CY10\n","cell_type":"code","prompt_number":3},{"code":"CY10.decompose()\n","cell_type":"code","prompt_number":4},{"code":"circuit_plot(CY10.decompose(), nqubits=2)","cell_type":"code","prompt_number":5},{"code":"CZ01 = CGate(0, Z(1)); CZ01\n","cell_type":"code","prompt_number":6},{"code":"CZ01.decompose()\n","cell_type":"code","prompt_number":7},{"code":"circuit_plot(CZ01.decompose(), nqubits=2)","cell_type":"code","prompt_number":8},{"code":"SWAP10 = SWAP(1, 0); SWAP10\n","cell_type":"code","prompt_number":9},{"code":"SWAP10.decompose()","cell_type":"code","prompt_number":10},{"code":"circuit_plot(SWAP10.decompose(), nqubits=2)","cell_type":"code","prompt_number":11},{"code":"gates = [CGate(1,Y(0)), CGate(0,Z(1)), SWAP(1, 0)]","cell_type":"code","prompt_number":12},{"code":"for g in gates:\n dg = g.decompose()\n display(Eq(g, dg))\n circuit_plot(g, nqubits=2)\n circuit_plot(dg, nqubits=2) ","cell_type":"code","prompt_number":16},{"code":"%notebook save decomposition.ipynb","cell_type":"code","prompt_number":30}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Dense Coding\n</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":2},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":3},{"code":"psi = Qubit('00')/sqrt(2) + Qubit('11')/sqrt(2); psi\n","cell_type":"code","prompt_number":4},{"code":"circuits = [H(1)*CNOT(1,0), H(1)*CNOT(1,0)*X(1), H(1)*CNOT(1,0)*Z(1), H(1)*CNOT(1,0)*Z(1)*X(1)]","cell_type":"code","prompt_number":20},{"code":"for circuit in circuits:\n circuit_plot(circuit, nqubits=2)\n display(Eq(circuit*psi,qapply(circuit*psi)))","cell_type":"code","prompt_number":21},{"code":"%notebook save dense_coding.ipynb","cell_type":"code","prompt_number":28}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Grover's Algorithm</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"nqubits = 3\n","cell_type":"code","prompt_number":4},{"code":"def black_box(qubits):\n return True if qubits == IntQubit(1, qubits.nqubits) else False\n","cell_type":"code","prompt_number":3},{"code":"psi = superposition_basis(nqubits); psi\n","cell_type":"code","prompt_number":5},{"code":"v = OracleGate(nqubits, black_box)\n","cell_type":"code","prompt_number":6},{"code":"iter1 = qapply(grover_iteration(psi, v)); iter1\n","cell_type":"code","prompt_number":7},{"code":"iter2 = qapply(grover_iteration(iter1, v)); iter2\n","cell_type":"code","prompt_number":8},{"code":"measure_all_oneshot(iter2)\n","cell_type":"code","prompt_number":12},{"code":"%notebook save grovers.ipynb","cell_type":"code","prompt_number":28}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Quantum Error Correction</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"M0 = Z(1)*X(2)*X(3)*Z(4); M0\n","cell_type":"code","prompt_number":3},{"code":"M1 = Z(2)*X(3)*X(4)*Z(0); M1\n","cell_type":"code","prompt_number":4},{"code":"M2 = Z(3)*X(4)*X(0)*Z(1); M2\n","cell_type":"code","prompt_number":5},{"code":"M3 = Z(4)*X(0)*X(1)*Z(2); M3\n","cell_type":"code","prompt_number":6},{"code":"gate_simp(Commutator(M0,M1).doit())\n","cell_type":"code","prompt_number":7},{"code":"for o in [M0,M1,M2,M3]:\n display(gate_simp(o*o))\n","cell_type":"code","prompt_number":8},{"code":"zero = Rational(1,4)*(1+M0)*(1+M1)*(1+M2)*(1+M3)*IntQubit(0, 5); zero\n","cell_type":"code","prompt_number":9},{"code":"qapply(4*zero)\n","cell_type":"code","prompt_number":10},{"code":"one = Rational(1,4)*(1+M0)*(1+M1)*(1+M2)*(1+M3)*IntQubit(2**5-1, 5); one\n","cell_type":"code","prompt_number":11},{"code":"qapply(4*one)\n","cell_type":"code","prompt_number":12},{"code":"encoding_circuit = H(3)*H(4)*CNOT(2,0)*CNOT(3,0)*CNOT(4,0)*H(1)*H(4)*\\\n CNOT(2,1)*CNOT(4,1)*H(2)*CNOT(3,2)*CNOT(4,2)*H(3)*\\\n H(4)*CNOT(4, 3)*Z(4)*H(4)*Z(4)\n","cell_type":"code","prompt_number":13},{"code":"circuit_plot(encoding_circuit, nqubits=5, scale=0.5)","cell_type":"code","prompt_number":14},{"code":"represent(4*encoding_circuit, nqubits=5)","cell_type":"code","prompt_number":16},{"code":"%notebook save qerror.ipynb","cell_type":"code","prompt_number":23},{"code":"","cell_type":"code","prompt_number":23}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Teleportation</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"fourier = QFT(0,3).decompose(); fourier\n","cell_type":"code","prompt_number":3},{"code":"circuit_plot(fourier, nqubits=3)","cell_type":"code","prompt_number":4},{"code":"m = represent(fourier, nqubits=3)","cell_type":"code","prompt_number":12},{"code":"m","cell_type":"code","prompt_number":13},{"code":"represent(Fourier(0,3), nqubits=3)*4/sqrt(2)\n","cell_type":"code","prompt_number":5},{"code":"state = (Qubit('000') + Qubit('010') + Qubit('100') + Qubit('110'))/sqrt(4); state\n","cell_type":"code","prompt_number":6},{"code":"qapply(fourier*state)\n","cell_type":"code","prompt_number":7},{"code":"%notebook save qft.ipynb","cell_type":"code","prompt_number":23}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Symbolic Quantum Computing</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":2},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":3},{"code":"alpha, beta = symbols('alpha beta',real=True)","cell_type":"code","prompt_number":4},{"code":"psi = alpha*Qubit('00') + beta*Qubit('11'); psi\n","cell_type":"code","prompt_number":5},{"code":"Dagger(psi)\n","cell_type":"code","prompt_number":6},{"code":"qapply(Dagger(Qubit('00'))*psi)\n","cell_type":"code","prompt_number":7},{"code":"for state, prob in measure_all(psi):\n display(state)\n display(prob)\n","cell_type":"code","prompt_number":8},{"code":"represent(psi, nqubits=2)\n","cell_type":"code","prompt_number":9},{"code":"g = X(0); g\n","cell_type":"code","prompt_number":10},{"code":"represent(g, nqubits=2)\n","cell_type":"code","prompt_number":11},{"code":"c = H(0)*Qubit('00'); c\n","cell_type":"code","prompt_number":12},{"code":"qapply(c)\n","cell_type":"code","prompt_number":13},{"code":"for g1 in (Y,Z,H):\n for g2 in (Y,Z,H):\n e = Commutator(g1(0),g2(0))\n if g1 != g2:\n display(Eq(e,e.doit()))\n","cell_type":"code","prompt_number":14},{"code":"c = H(0)*X(1)*H(0)**2*CNOT(0,1)*X(1)**3*X(0)*Z(2)**2*S(3)**3; c\n","cell_type":"code","prompt_number":24},{"code":"circuit_plot(c, nqubits=4)","cell_type":"code","prompt_number":25},{"code":"gate_simp(c)\n","cell_type":"code","prompt_number":16},{"code":"circuit_plot(gate_simp(c),nqubits=5)","cell_type":"code","prompt_number":23},{"code":"%notebook save quantum_computing.ipynb","cell_type":"code","prompt_number":35}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Teleportation</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"a,b = symbols('ab', real=True)\nstate = Qubit('000')*a + Qubit('001')*b; state","cell_type":"code","prompt_number":3},{"code":"entangle1_2 = CNOT(1,2)*HadamardGate(1); entangle1_2\n","cell_type":"code","prompt_number":4},{"code":"state = qapply(entangle1_2*state); state\n","cell_type":"code","prompt_number":5},{"code":"entangle0_1 = HadamardGate(0)*CNOT(0,1); entangle0_1\n","cell_type":"code","prompt_number":6},{"code":"circuit_plot(entangle0_1*entangle1_2, nqubits=3)\n","cell_type":"code","prompt_number":7},{"code":"state = qapply(entangle0_1*state); state\n","cell_type":"code","prompt_number":8},{"code":"result = measure_partial(state, (0,1))\n","cell_type":"code","prompt_number":10},{"code":"state = (result[2][0]*2).expand(); state","cell_type":"code","prompt_number":11},{"code":"state = qapply(XGate(2)*state); state\n","cell_type":"code","prompt_number":12},{"code":"%notebook save teleportation.ipynb","cell_type":"code","prompt_number":13},{"code":"","cell_type":"code","prompt_number":18}]} No newline at end of file |
@@ -0,0 +1,1 b'' | |||||
|
1 | {"cells":[{"cell_type":"text","text":"<h1>Text Analysis Using NetworkX</h1>"},{"cell_type":"text","text":"<p>This notebook will analyze a plain text file treating it as a list of\nnewline-separated sentences (e.g. a list of paper titles).</p>\n<br>\n<p>It computes word frequencies (after doing some naive normalization by\nlowercasing and throwing away a few overly common words). It also computes,\nfrom the most common words, a weighted graph of word co-occurrences and\ndisplays it, as well as summarizing the graph structure by ranking its nodes in\ndescending order of eigenvector centrality.</p>\n<br>\n<p>This is meant as an illustration of text processing in Python, using matplotlib\nfor visualization and NetworkX for graph-theoretical manipulation. It should\nnot be considered production-strength code for serious text analysis.</p>\n<br>\n<p>Author: Fernando Perez</p>"},{"code":"%run text_analysis.py","cell_type":"code","prompt_number":3},{"code":"default_url = \"http://bibserver.berkeley.edu/tmp/titles.txt\"\nn_words = 15\nn_nodes = 15\nurl = default_url\n ","cell_type":"code","prompt_number":4},{"cell_type":"text","text":"Fetch text and do basic preprocessing."},{"code":"text = get_text_from_url(url).lower()\nlines = text.splitlines()\nwords = text_cleanup(text)","cell_type":"code","prompt_number":5},{"cell_type":"text","text":"Compute frequency histogram."},{"code":"wf = word_freq(words)\nsorted_wf = sort_freqs(wf)","cell_type":"code","prompt_number":6},{"cell_type":"text","text":"Build a graph from the n_nodes most frequent words."},{"code":"popular = sorted_wf[-n_nodes:]\npop_words = [wc[0] for wc in popular]\nco_occur = co_occurrences(lines, pop_words)\nwgraph = co_occurrences_graph(popular, co_occur, cutoff=1)\ncentrality = nx.eigenvector_centrality_numpy(wgraph)\n","cell_type":"code","prompt_number":7},{"cell_type":"text","text":"Print summaries of single-word frequencies and graph structure."},{"code":"summarize_freq_hist(sorted_wf)\nsummarize_centrality(centrality)","cell_type":"code","prompt_number":8},{"cell_type":"text","text":"Plot histogram and graph."},{"code":"plot_word_histogram(sorted_wf, n_words,\"Frequencies for %s most frequent words\" % n_words)","cell_type":"code","prompt_number":9},{"code":"plot_word_histogram(sorted_wf, 1.0, \"Frequencies for entire word list\")\n","cell_type":"code","prompt_number":10},{"code":"plot_graph(wgraph)","cell_type":"code","prompt_number":11},{"code":"%notebook save text_analysis.ipynb","cell_type":"code","prompt_number":10}]} No newline at end of file |
@@ -0,0 +1,373 b'' | |||||
|
1 | #!/usr/bin/env python | |||
|
2 | """Simple text analysis: word frequencies and co-occurrence graph. | |||
|
3 | ||||
|
4 | Usage: | |||
|
5 | ||||
|
6 | text_analysis.py [text_file] | |||
|
7 | ||||
|
8 | This script will analize a plain text file treating it as a list of | |||
|
9 | newline-separated sentences (e.g. a list of paper titles). | |||
|
10 | ||||
|
11 | It computes word frequencies (after doing some naive normalization by | |||
|
12 | lowercasing and throwing away a few overly common words). It also computes, | |||
|
13 | from the most common words, a weighted graph of word co-occurrences and | |||
|
14 | displays it, as well as summarizing the graph structure by ranking its nodes in | |||
|
15 | descending order of eigenvector centrality. | |||
|
16 | ||||
|
17 | This is meant as an illustration of text processing in Python, using matplotlib | |||
|
18 | for visualization and NetworkX for graph-theoretical manipulation. It should | |||
|
19 | not be considered production-strength code for serious text analysis. | |||
|
20 | ||||
|
21 | Author: Fernando Perez <fernando.perez@berkeley.edu> | |||
|
22 | """ | |||
|
23 | ||||
|
24 | #----------------------------------------------------------------------------- | |||
|
25 | # Imports | |||
|
26 | #----------------------------------------------------------------------------- | |||
|
27 | ||||
|
28 | # From the standard library | |||
|
29 | import os | |||
|
30 | import re | |||
|
31 | import sys | |||
|
32 | import urllib2 | |||
|
33 | ||||
|
34 | # Third-party libraries | |||
|
35 | import networkx as nx | |||
|
36 | import numpy as np | |||
|
37 | ||||
|
38 | from matplotlib import pyplot as plt | |||
|
39 | ||||
|
40 | #----------------------------------------------------------------------------- | |||
|
41 | # Function definitions | |||
|
42 | #----------------------------------------------------------------------------- | |||
|
43 | ||||
|
44 | def rescale_arr(arr,amin,amax): | |||
|
45 | """Rescale an array to a new range. | |||
|
46 | ||||
|
47 | Return a new array whose range of values is (amin,amax). | |||
|
48 | ||||
|
49 | Parameters | |||
|
50 | ---------- | |||
|
51 | arr : array-like | |||
|
52 | ||||
|
53 | amin : float | |||
|
54 | new minimum value | |||
|
55 | ||||
|
56 | amax : float | |||
|
57 | new maximum value | |||
|
58 | ||||
|
59 | Examples | |||
|
60 | -------- | |||
|
61 | >>> a = np.arange(5) | |||
|
62 | ||||
|
63 | >>> rescale_arr(a,3,6) | |||
|
64 | array([ 3. , 3.75, 4.5 , 5.25, 6. ]) | |||
|
65 | """ | |||
|
66 | ||||
|
67 | # old bounds | |||
|
68 | m = arr.min() | |||
|
69 | M = arr.max() | |||
|
70 | # scale/offset | |||
|
71 | s = float(amax-amin)/(M-m) | |||
|
72 | d = amin - s*m | |||
|
73 | ||||
|
74 | # Apply clip before returning to cut off possible overflows outside the | |||
|
75 | # intended range due to roundoff error, so that we can absolutely guarantee | |||
|
76 | # that on output, there are no values > amax or < amin. | |||
|
77 | return np.clip(s*arr+d,amin,amax) | |||
|
78 | ||||
|
79 | ||||
|
80 | def all_pairs(items): | |||
|
81 | """Make all unique pairs (order doesn't matter)""" | |||
|
82 | pairs = [] | |||
|
83 | nitems = len(items) | |||
|
84 | for i, wi in enumerate(items): | |||
|
85 | for j in range(i+1, nitems): | |||
|
86 | pairs.append((wi, items[j])) | |||
|
87 | return pairs | |||
|
88 | ||||
|
89 | ||||
|
90 | def text_cleanup(text, min_length=3, | |||
|
91 | remove = set(['for', 'the', 'and', 'with'])): | |||
|
92 | """Clean up a list of lowercase strings of text for simple analysis. | |||
|
93 | ||||
|
94 | Splits on whitespace, removes all 'words' less than `min_length` characters | |||
|
95 | long, and those in the `remove` set. | |||
|
96 | ||||
|
97 | Returns a list of strings. | |||
|
98 | """ | |||
|
99 | return [w for w in text.lower().split() | |||
|
100 | if len(w)>=min_length and w not in remove] | |||
|
101 | ||||
|
102 | ||||
|
103 | def print_vk(lst): | |||
|
104 | """Print a list of value/key pairs nicely formatted in key/value order.""" | |||
|
105 | ||||
|
106 | # Find the longest key: remember, the list has value/key paris, so the key | |||
|
107 | # is element [1], not [0] | |||
|
108 | longest_key = max([len(word) for word, count in lst]) | |||
|
109 | # Make a format string out of it | |||
|
110 | fmt = '%'+str(longest_key)+'s -> %s' | |||
|
111 | # Do actual printing | |||
|
112 | for k,v in lst: | |||
|
113 | print fmt % (k,v) | |||
|
114 | ||||
|
115 | ||||
|
116 | def word_freq(text): | |||
|
117 | """Return a dictionary of word frequencies for the given text. | |||
|
118 | ||||
|
119 | Input text should be given as an iterable of strings.""" | |||
|
120 | ||||
|
121 | freqs = {} | |||
|
122 | for word in text: | |||
|
123 | freqs[word] = freqs.get(word, 0) + 1 | |||
|
124 | return freqs | |||
|
125 | ||||
|
126 | ||||
|
127 | def sort_freqs(freqs): | |||
|
128 | """Sort a word frequency histogram represented as a dictionary. | |||
|
129 | ||||
|
130 | Parameters | |||
|
131 | ---------- | |||
|
132 | freqs : dict | |||
|
133 | A dict with string keys and integer values. | |||
|
134 | ||||
|
135 | Return | |||
|
136 | ------ | |||
|
137 | items : list | |||
|
138 | A list of (count, word) pairs. | |||
|
139 | """ | |||
|
140 | items = freqs.items() | |||
|
141 | items.sort(key = lambda wc: wc[1]) | |||
|
142 | return items | |||
|
143 | ## words,counts = freqs.keys(),freqs.values() | |||
|
144 | ## # Sort by count | |||
|
145 | ## items = zip(counts,words) | |||
|
146 | ## items.sort() | |||
|
147 | ## return items | |||
|
148 | ||||
|
149 | ||||
|
150 | def summarize_freq_hist(freqs, n=10): | |||
|
151 | """Print a simple summary of a word frequencies dictionary. | |||
|
152 | ||||
|
153 | Paramters | |||
|
154 | --------- | |||
|
155 | freqs : dict or list | |||
|
156 | Word frequencies, represented either as a dict of word->count, or as a | |||
|
157 | list of count->word pairs. | |||
|
158 | ||||
|
159 | n : int | |||
|
160 | The number of least/most frequent words to print. | |||
|
161 | """ | |||
|
162 | ||||
|
163 | items = sort_freqs(freqs) if isinstance(freqs, dict) else freqs | |||
|
164 | print 'Number of unique words:',len(freqs) | |||
|
165 | ||||
|
166 | print '%d least frequent words:' % n | |||
|
167 | print_vk(items[:n]) | |||
|
168 | ||||
|
169 | print '%d most frequent words:' % n | |||
|
170 | print_vk(items[-n:]) | |||
|
171 | ||||
|
172 | ||||
|
173 | def get_text_from_url(url): | |||
|
174 | """Given a url (local file path or remote url), read its contents. | |||
|
175 | ||||
|
176 | If it's a remote URL, it downloads the file and leaves it locally cached | |||
|
177 | for future runs. If the local matching file is found, no download is made. | |||
|
178 | ||||
|
179 | Returns | |||
|
180 | ------- | |||
|
181 | text : string | |||
|
182 | The contents of the file. | |||
|
183 | """ | |||
|
184 | if url.startswith('http'): | |||
|
185 | # remote file, fetch only if needed | |||
|
186 | fname = os.path.split(url)[1] | |||
|
187 | if os.path.isfile(fname): | |||
|
188 | with open(fname, 'r') as f: | |||
|
189 | text = f.read() | |||
|
190 | else: | |||
|
191 | with open(fname, 'w') as f: | |||
|
192 | text = urllib2.urlopen(url).read() | |||
|
193 | f.write(text) | |||
|
194 | else: | |||
|
195 | with open(url, 'r') as f: | |||
|
196 | text = f.read() | |||
|
197 | return text | |||
|
198 | ||||
|
199 | ||||
|
200 | def co_occurrences(lines, words): | |||
|
201 | """Return histogram of co-occurrences of words in a list of lines. | |||
|
202 | ||||
|
203 | Parameters | |||
|
204 | ---------- | |||
|
205 | lines : list | |||
|
206 | A list of strings considered as 'sentences' to search for co-occurrences. | |||
|
207 | ||||
|
208 | words : list | |||
|
209 | A list of words from which all unordered pairs will be constructed and | |||
|
210 | searched for co-occurrences. | |||
|
211 | """ | |||
|
212 | wpairs = all_pairs(words) | |||
|
213 | ||||
|
214 | # Now build histogram of co-occurrences | |||
|
215 | co_occur = {} | |||
|
216 | for w1, w2 in wpairs: | |||
|
217 | rx = re.compile('%s .*%s|%s .*%s' % (w1, w2, w2, w1)) | |||
|
218 | co_occur[w1, w2] = sum([1 for line in lines if rx.search(line)]) | |||
|
219 | ||||
|
220 | return co_occur | |||
|
221 | ||||
|
222 | ||||
|
223 | def co_occurrences_graph(word_hist, co_occur, cutoff=0): | |||
|
224 | """Convert a word histogram with co-occurrences to a weighted graph. | |||
|
225 | ||||
|
226 | Edges are only added if the count is above cutoff. | |||
|
227 | """ | |||
|
228 | g = nx.Graph() | |||
|
229 | for word, count in word_hist: | |||
|
230 | g.add_node(word, count=count) | |||
|
231 | for (w1, w2), count in co_occur.iteritems(): | |||
|
232 | if count<=cutoff: | |||
|
233 | continue | |||
|
234 | g.add_edge(w1, w2, weight=count) | |||
|
235 | return g | |||
|
236 | ||||
|
237 | ||||
|
238 | def plot_graph(wgraph, pos=None): | |||
|
239 | """Conveniently summarize graph visually""" | |||
|
240 | # Plot nodes with size according to count | |||
|
241 | sizes = [] | |||
|
242 | degrees = [] | |||
|
243 | for n, d in wgraph.nodes_iter(data=True): | |||
|
244 | sizes.append(d['count']) | |||
|
245 | degrees.append(wgraph.degree(n)) | |||
|
246 | sizes = rescale_arr(np.array(sizes, dtype=float), 100, 1000) | |||
|
247 | ||||
|
248 | # Compute layout and label edges according to weight | |||
|
249 | pos = nx.spring_layout(wgraph) if pos is None else pos | |||
|
250 | labels = {} | |||
|
251 | width = [] | |||
|
252 | for n1, n2, d in wgraph.edges_iter(data=True): | |||
|
253 | w = d['weight'] | |||
|
254 | labels[n1, n2] = w | |||
|
255 | width.append(w) | |||
|
256 | ||||
|
257 | # remap width to 1-10 range | |||
|
258 | width = rescale_arr(np.array(width, dtype=float), 1, 15) | |||
|
259 | ||||
|
260 | # Create figure | |||
|
261 | fig, ax = plt.subplots() | |||
|
262 | fig.subplots_adjust(0,0,1) | |||
|
263 | nx.draw_networkx_nodes(wgraph, pos, node_size=sizes, node_color=degrees, | |||
|
264 | alpha=0.8) | |||
|
265 | nx.draw_networkx_labels(wgraph, pos, font_size=15, font_weight='bold') | |||
|
266 | nx.draw_networkx_edges(wgraph, pos, width=width, edge_color=width, | |||
|
267 | edge_cmap=plt.cm.Blues) | |||
|
268 | nx.draw_networkx_edge_labels(wgraph, pos, edge_labels=labels) | |||
|
269 | ax.set_title('Node color:degree, size:count, edge: co-occurrence count') | |||
|
270 | ||||
|
271 | ||||
|
272 | def plot_word_histogram(freqs, show=10, title=None): | |||
|
273 | """Plot a histogram of word frequencies, limited to the top `show` ones. | |||
|
274 | """ | |||
|
275 | sorted_f = sort_freqs(freqs) if isinstance(freqs, dict) else freqs | |||
|
276 | ||||
|
277 | # Don't show the tail | |||
|
278 | if isinstance(show, int): | |||
|
279 | # interpret as number of words to show in histogram | |||
|
280 | show_f = sorted_f[-show:] | |||
|
281 | else: | |||
|
282 | # interpret as a fraction | |||
|
283 | start = -int(round(show*len(freqs))) | |||
|
284 | show_f = sorted_f[start:] | |||
|
285 | ||||
|
286 | # Now, extract words and counts, plot | |||
|
287 | n_words = len(show_f) | |||
|
288 | ind = np.arange(n_words) | |||
|
289 | words = [i[0] for i in show_f] | |||
|
290 | counts = [i[1] for i in show_f] | |||
|
291 | ||||
|
292 | fig, ax = plt.subplots() | |||
|
293 | if n_words<=20: | |||
|
294 | # Only show bars and x labels for small histograms, they don't make | |||
|
295 | # sense otherwise | |||
|
296 | ax.bar(ind, counts) | |||
|
297 | ax.set_xticks(ind) | |||
|
298 | ax.set_xticklabels(words, rotation=45) | |||
|
299 | fig.subplots_adjust(bottom=0.25) | |||
|
300 | else: | |||
|
301 | # For larger ones, do a step plot | |||
|
302 | ax.step(ind, counts) | |||
|
303 | ||||
|
304 | # If it spans more than two decades, use a log scale | |||
|
305 | if float(max(counts))/min(counts) > 100: | |||
|
306 | ax.set_yscale('log') | |||
|
307 | ||||
|
308 | if title: | |||
|
309 | ax.set_title(title) | |||
|
310 | return ax | |||
|
311 | ||||
|
312 | ||||
|
313 | def summarize_centrality(centrality): | |||
|
314 | c = centrality.items() | |||
|
315 | c.sort(key=lambda x:x[1], reverse=True) | |||
|
316 | print '\nGraph centrality' | |||
|
317 | for node, cent in c: | |||
|
318 | print "%15s: %.3g" % (node, cent) | |||
|
319 | ||||
|
320 | #----------------------------------------------------------------------------- | |||
|
321 | # Main script | |||
|
322 | #----------------------------------------------------------------------------- | |||
|
323 | ||||
|
324 | # if __name__ == '__main__': | |||
|
325 | ||||
|
326 | # # Configure user variables here | |||
|
327 | # # Specify the url (can be a local file path) of the text file to analyze. | |||
|
328 | # # If not given, it's read from the command line as the first argument | |||
|
329 | # | |||
|
330 | # # 11226 titles of recent articles in arxiv/math/prob | |||
|
331 | # default_url = "http://bibserver.berkeley.edu/tmp/titles.txt" | |||
|
332 | # # Number of words to display in detailed histogram | |||
|
333 | # n_words = 15 | |||
|
334 | # # Number of words to use as nodes for co-occurrence graph. | |||
|
335 | # n_nodes = 15 | |||
|
336 | # | |||
|
337 | # # End of user configuration | |||
|
338 | # | |||
|
339 | # # Actual code starts here | |||
|
340 | # try: | |||
|
341 | # url = sys.argv[1] | |||
|
342 | # except IndexError: | |||
|
343 | # url = default_url | |||
|
344 | # | |||
|
345 | # # Fetch text and do basic preprocessing | |||
|
346 | # text = get_text_from_url(url).lower() | |||
|
347 | # lines = text.splitlines() | |||
|
348 | # words = text_cleanup(text) | |||
|
349 | # | |||
|
350 | # # Compute frequency histogram | |||
|
351 | # wf = word_freq(words) | |||
|
352 | # sorted_wf = sort_freqs(wf) | |||
|
353 | # | |||
|
354 | # # Build a graph from the n_nodes most frequent words | |||
|
355 | # popular = sorted_wf[-n_nodes:] | |||
|
356 | # pop_words = [wc[0] for wc in popular] | |||
|
357 | # co_occur = co_occurrences(lines, pop_words) | |||
|
358 | # wgraph = co_occurrences_graph(popular, co_occur, cutoff=1) | |||
|
359 | # centrality = nx.eigenvector_centrality_numpy(wgraph) | |||
|
360 | # | |||
|
361 | # # Print summaries of single-word frequencies and graph structure | |||
|
362 | # summarize_freq_hist(sorted_wf) | |||
|
363 | # summarize_centrality(centrality) | |||
|
364 | # | |||
|
365 | # # Plot histogram and graph | |||
|
366 | # plt.close('all') | |||
|
367 | # plot_word_histogram(sorted_wf, n_words, | |||
|
368 | # "Frequencies for %s most frequent words" % n_words) | |||
|
369 | # plot_word_histogram(sorted_wf, 1.0, "Frequencies for entire word list") | |||
|
370 | # plot_graph(wgraph) | |||
|
371 | # | |||
|
372 | # # Display figures | |||
|
373 | # plt.show() |
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
General Comments 0
You need to be logged in to leave comments.
Login now