Gun_Data.orig.tex
505 lines
| 15.2 KiB
| application/x-tex
|
TexLexer
Matthias BUSSONNIER
|
r9595 | %% This file was auto-generated by IPython. | ||
%% Conversion from the original notebook file: | ||||
%% tests/ipynbref/Gun_Data.orig.ipynb | ||||
%% | ||||
\documentclass[11pt,english]{article} | ||||
%% This is the automatic preamble used by IPython. Note that it does *not* | ||||
%% include a documentclass declaration, that is added at runtime to the overall | ||||
%% document. | ||||
\usepackage{amsmath} | ||||
\usepackage{amssymb} | ||||
\usepackage{graphicx} | ||||
\usepackage{ucs} | ||||
\usepackage[utf8x]{inputenc} | ||||
% needed for markdown enumerations to work | ||||
\usepackage{enumerate} | ||||
% Slightly bigger margins than the latex defaults | ||||
\usepackage{geometry} | ||||
\geometry{verbose,tmargin=3cm,bmargin=3cm,lmargin=2.5cm,rmargin=2.5cm} | ||||
% Define a few colors for use in code, links and cell shading | ||||
\usepackage{color} | ||||
\definecolor{orange}{cmyk}{0,0.4,0.8,0.2} | ||||
\definecolor{darkorange}{rgb}{.71,0.21,0.01} | ||||
\definecolor{darkgreen}{rgb}{.12,.54,.11} | ||||
\definecolor{myteal}{rgb}{.26, .44, .56} | ||||
\definecolor{gray}{gray}{0.45} | ||||
\definecolor{lightgray}{gray}{.95} | ||||
\definecolor{mediumgray}{gray}{.8} | ||||
\definecolor{inputbackground}{rgb}{.95, .95, .85} | ||||
\definecolor{outputbackground}{rgb}{.95, .95, .95} | ||||
\definecolor{traceback}{rgb}{1, .95, .95} | ||||
% Framed environments for code cells (inputs, outputs, errors, ...). The | ||||
% various uses of \unskip (or not) at the end were fine-tuned by hand, so don't | ||||
% randomly change them unless you're sure of the effect it will have. | ||||
\usepackage{framed} | ||||
% remove extraneous vertical space in boxes | ||||
\setlength\fboxsep{0pt} | ||||
% codecell is the whole input+output set of blocks that a Code cell can | ||||
% generate. | ||||
% TODO: unfortunately, it seems that using a framed codecell environment breaks | ||||
% the ability of the frames inside of it to be broken across pages. This | ||||
% causes at least the problem of having lots of empty space at the bottom of | ||||
% pages as new frames are moved to the next page, and if a single frame is too | ||||
% long to fit on a page, will completely stop latex from compiling the | ||||
% document. So unless we figure out a solution to this, we'll have to instead | ||||
% leave the codecell env. as empty. I'm keeping the original codecell | ||||
% definition here (a thin vertical bar) for reference, in case we find a | ||||
% solution to the page break issue. | ||||
%% \newenvironment{codecell}{% | ||||
%% \def\FrameCommand{\color{mediumgray} \vrule width 1pt \hspace{5pt}}% | ||||
%% \MakeFramed{\vspace{-0.5em}}} | ||||
%% {\unskip\endMakeFramed} | ||||
% For now, make this a no-op... | ||||
\newenvironment{codecell}{} | ||||
\newenvironment{codeinput}{% | ||||
\def\FrameCommand{\colorbox{inputbackground}}% | ||||
\MakeFramed{\advance\hsize-\width \FrameRestore}} | ||||
{\unskip\endMakeFramed} | ||||
\newenvironment{codeoutput}{% | ||||
\def\FrameCommand{\colorbox{outputbackground}}% | ||||
\vspace{-1.4em} | ||||
\MakeFramed{\advance\hsize-\width \FrameRestore}} | ||||
{\unskip\medskip\endMakeFramed} | ||||
\newenvironment{traceback}{% | ||||
\def\FrameCommand{\colorbox{traceback}}% | ||||
\MakeFramed{\advance\hsize-\width \FrameRestore}} | ||||
{\endMakeFramed} | ||||
% Use and configure listings package for nicely formatted code | ||||
\usepackage{listingsutf8} | ||||
\lstset{ | ||||
language=python, | ||||
inputencoding=utf8x, | ||||
extendedchars=\true, | ||||
aboveskip=\smallskipamount, | ||||
belowskip=\smallskipamount, | ||||
xleftmargin=2mm, | ||||
breaklines=true, | ||||
basicstyle=\small \ttfamily, | ||||
showstringspaces=false, | ||||
keywordstyle=\color{blue}\bfseries, | ||||
commentstyle=\color{myteal}, | ||||
stringstyle=\color{darkgreen}, | ||||
identifierstyle=\color{darkorange}, | ||||
columns=fullflexible, % tighter character kerning, like verb | ||||
} | ||||
% The hyperref package gives us a pdf with properly built | ||||
% internal navigation ('pdf bookmarks' for the table of contents, | ||||
% internal cross-reference links, web links for URLs, etc.) | ||||
\usepackage{hyperref} | ||||
\hypersetup{ | ||||
breaklinks=true, % so long urls are correctly broken across lines | ||||
colorlinks=true, | ||||
urlcolor=blue, | ||||
linkcolor=darkorange, | ||||
citecolor=darkgreen, | ||||
} | ||||
% hardcode size of all verbatim environments to be a bit smaller | ||||
\makeatletter | ||||
\g@addto@macro\@verbatim\small\topsep=0.5em\partopsep=0pt | ||||
\makeatother | ||||
% Prevent overflowing lines due to urls and other hard-to-break entities. | ||||
\sloppy | ||||
\begin{document} | ||||
\section{Some gun violence analysis with Wikipedia data} | ||||
As | ||||
\href{https://twitter.com/jonst0kes/status/282330530412888064}{requested | ||||
by John Stokes}, here are per-capita numbers for gun-related homicides, | ||||
relating to GDP and total homicides, so the situation in the United | ||||
States can be put in context relative to other nations. | ||||
main data source is UNODC (via Wikipedia | ||||
\href{http://en.wikipedia.org/wiki/List\_of\_countries\_by\_intentional\_homicide\_rate}{here} | ||||
and | ||||
\href{http://en.wikipedia.org/wiki/List\_of\_countries\_by\_firearm-related\_death\_rate}{here}). | ||||
GDP data from World Bank, again | ||||
\href{http://en.wikipedia.org/wiki/List\_of\_countries\_by\_GDP\_(PPP)\_per\_capita}{via | ||||
Wikipedia}. | ||||
If the numbers on Wikipedia are inaccurate, or their relationship is not | ||||
sound (e.g.~numbers taken from different years, during which significant | ||||
change occured) then obviously None of this analysis is valid. | ||||
To summarize the data, every possible way you look at it the US is lousy | ||||
at preventing gun violence. Even when compared to significantly more | ||||
violent places, gun violence in the US is a serious problem, and when | ||||
compared to similarly wealthy places, the US is an outstanding disaster. | ||||
\textbf{UPDATE:} the relationship of the gun data and totals does not | ||||
seem to be valid. | ||||
\href{http://www2.fbi.gov/ucr/cius2009/offenses/violent\_crime/index.html}{FBI | ||||
data} suggests that the relative contribution of guns to homicides in | ||||
the US is 47\%, but relating these two data sources gives 80\%. Internal | ||||
comparisons should still be fine, but `fraction' analysis has been | ||||
stricken. | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
%load_ext retina | ||||
%pylab inline | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\begin{codeoutput} | ||||
\begin{verbatim} | ||||
Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline]. | ||||
For more information, type 'help(pylab)'. | ||||
\end{verbatim} | ||||
\end{codeoutput} | ||||
\end{codecell} | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
from IPython.display import display | ||||
import pandas | ||||
pandas.set_option('display.notebook_repr_html', True) | ||||
pandas.set_option('display.precision', 2) | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\end{codecell} | ||||
Some utility functions for display | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
def plot_percent(df, limit=10): | ||||
df['Gun Percent'][:limit].plot() | ||||
plt.ylim(0,100) | ||||
plt.title("% Gun Homicide") | ||||
plt.show() | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\end{codecell} | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
def plot_percapita(df, limit=10): | ||||
df = df.ix[:,['Homicides', 'Gun Homicides']][:limit] | ||||
df['Total Homicides'] = df['Homicides'] - df['Gun Homicides'] | ||||
del df['Homicides'] | ||||
df.plot(kind='bar', stacked=True, sort_columns=True) | ||||
plt.ylabel("per 100k") | ||||
plt.show() | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\end{codecell} | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
def display_relevant(df, limit=10): | ||||
display(df.ix[:,['Homicides', 'Gun Homicides', 'Gun Data Source']][:limit]) | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\end{codecell} | ||||
Load the data | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
totals = pandas.read_csv('totals.csv', '\t', index_col=0) | ||||
guns = pandas.read_csv('guns.csv', '\t', index_col=0) | ||||
gdp = pandas.read_csv('gdp.csv', '\t', index_col=1) | ||||
data = totals.join(guns).join(gdp) | ||||
data['Gun Percent'] = 100 * data['Gun Homicides'] / data['Homicides'] | ||||
del data['Unintentional'],data['Undetermined'],data['Gun Suicides'] | ||||
data = data.dropna() | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\end{codecell} | ||||
Of all sampled countries (Found data for 68 countries), the US is in the | ||||
top 15 in Gun Homicides per capita. | ||||
Numbers are per 100k. | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
data = data.sort("Gun Homicides", ascending=False) | ||||
display_relevant(data, 15) | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\begin{codeoutput} | ||||
\begin{verbatim} | ||||
Homicides Gun Homicides Gun Data Source | ||||
Country | ||||
El Salvador 69.2 50.4 OAS 2011[1] | ||||
Jamaica 52.2 47.4 OAS 2011[1] | ||||
Honduras 91.6 46.7 OAS 2011[1] | ||||
Guatemala 38.5 38.5 OAS 2011[1] | ||||
Colombia 33.4 27.1 UNODC 2011 [2] | ||||
Brazil 21.0 18.1 UNODC 2011[3] | ||||
Panama 21.6 12.9 OAS 2011[1] | ||||
Mexico 16.9 10.0 UNODC 2011[4] | ||||
Paraguay 11.5 7.3 UNODC 2000[11] | ||||
Nicaragua 13.6 7.1 OAS 2011[1] | ||||
United States 4.2 3.7 OAS 2012[5][6] | ||||
Costa Rica 10.0 3.3 UNODC 2002[7] | ||||
Uruguay 5.9 3.2 UNODC 2002[7] | ||||
Argentina 3.4 3.0 UNODC 2011[12] | ||||
Barbados 11.3 3.0 UNODC 2000[11] | ||||
\end{verbatim} | ||||
\end{codeoutput} | ||||
\end{codecell} | ||||
Take top 30 Countries by GDP | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
top = data.sort('GDP')[-30:] | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\end{codecell} | ||||
and rank them by Gun Homicides per capita: | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
top_by_guns = top.sort("Gun Homicides", ascending=False) | ||||
display_relevant(top_by_guns, 5) | ||||
plot_percapita(top_by_guns, 10) | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\begin{codeoutput} | ||||
\begin{verbatim} | ||||
Homicides Gun Homicides Gun Data Source | ||||
Country | ||||
United States 4.2 3.7 OAS 2012[5][6] | ||||
Israel 2.1 0.9 WHO 2012[10] | ||||
Canada 1.6 0.8 Krug 1998[13] | ||||
Luxembourg 2.5 0.6 WHO 2012[10] | ||||
Greece 1.5 0.6 Krug 1998[13] | ||||
\end{verbatim} | ||||
\begin{center} | ||||
\includegraphics[width=0.7\textwidth]{Gun_Data_orig_files/Gun_Data_orig_fig_00.png} | ||||
\par | ||||
\end{center} | ||||
\end{codeoutput} | ||||
\end{codecell} | ||||
\textbf{NOTE:} these bar graphs should not be interpreted as fractions | ||||
of a total, as the two data sources do not appear to be comparable. But | ||||
the red and blue bar graphs should still be internally comparable. | ||||
The US is easily \#1 of 30 wealthiest countries in Gun Homicides per | ||||
capita, by a factor of 4:1 | ||||
Adding USA, Canada, and Mexico to all of Europe, USA is a strong \#2 | ||||
behind Mexico in total gun homicides per-capita | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
index = (data['Region'] == 'Europe') + \ | ||||
(data.index == 'United States') + \ | ||||
(data.index == 'Canada') + \ | ||||
(data.index == 'Mexico') | ||||
selected = data[index] | ||||
print "By Total Gun Homicides" | ||||
sys.stdout.flush() | ||||
by_guns = selected.sort("Gun Homicides", ascending=False) | ||||
#by_guns['Gun Homicides'].plot(kind='bar') | ||||
plot_percapita(by_guns, limit=25) | ||||
display_relevant(selected, limit=None) | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\begin{codeoutput} | ||||
\begin{verbatim} | ||||
By Total Gun Homicides | ||||
\end{verbatim} | ||||
\begin{center} | ||||
\includegraphics[width=0.7\textwidth]{Gun_Data_orig_files/Gun_Data_orig_fig_01.png} | ||||
\par | ||||
\end{center} | ||||
\begin{verbatim} | ||||
Homicides Gun Homicides Gun Data Source | ||||
Country | ||||
Mexico 16.9 10.0 UNODC 2011[4] | ||||
United States 4.2 3.7 OAS 2012[5][6] | ||||
Montenegro 3.5 2.1 WHO 2012[10] | ||||
Moldova 7.5 1.0 WHO 2012[10] | ||||
Canada 1.6 0.8 Krug 1998[13] | ||||
Serbia 1.2 0.6 WHO 2012[10] | ||||
Luxembourg 2.5 0.6 WHO 2012[10] | ||||
Greece 1.5 0.6 Krug 1998[13] | ||||
Croatia 1.4 0.6 WHO 2012[10] | ||||
Switzerland 0.7 0.5 OAS 2011[1] | ||||
Malta 1.0 0.5 WHO 2012[10] | ||||
Portugal 1.2 0.5 WHO 2012[10] | ||||
Belarus 4.9 0.4 UNODC 2002[7] | ||||
Ireland 1.2 0.4 WHO 2012[10] | ||||
Italy 0.9 0.4 WHO 2012[10] | ||||
Ukraine 5.2 0.3 UNODC 2000[11] | ||||
Estonia 5.2 0.3 WHO 2012[10] | ||||
Belgium 1.7 0.3 WHO 2012[10] | ||||
Finland 2.2 0.3 WHO 2012[10] | ||||
Lithuania 6.6 0.2 WHO 2012[10] | ||||
Bulgaria 2.0 0.2 WHO 2012[10] | ||||
Georgia 4.3 0.2 WHO 2012[10] | ||||
Denmark 0.9 0.2 WHO 2012[10] | ||||
France 1.1 0.2 WHO 2012[10] | ||||
Netherlands 1.1 0.2 WHO 2012[10] | ||||
Sweden 1.0 0.2 WHO 2012[10] | ||||
Slovakia 1.5 0.2 WHO 2012[10] | ||||
Austria 0.6 0.2 WHO 2012[10] | ||||
Latvia 3.1 0.2 WHO 2012[10] | ||||
Spain 0.8 0.1 WHO 2012[10] | ||||
Hungary 1.3 0.1 WHO 2012[10] | ||||
Czech Republic 1.7 0.1 WHO 2012[10] | ||||
Germany 0.8 0.1 WHO 2012[10] | ||||
Slovenia 0.7 0.1 WHO 2012[10] | ||||
Romania 2.0 0.0 WHO 2012[10] | ||||
United Kingdom 1.2 0.0 WHO2012 [10] | ||||
Norway 0.6 0.0 WHO 2012[10] | ||||
Poland 1.1 0.0 WHO 2012[10] | ||||
\end{verbatim} | ||||
\end{codeoutput} | ||||
\end{codecell} | ||||
Let's just compare US, Canada, and UK: | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
select = data.ix[['United States', 'Canada', 'United Kingdom']] | ||||
plot_percapita(select) | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\begin{codeoutput} | ||||
\begin{center} | ||||
\includegraphics[width=0.7\textwidth]{Gun_Data_orig_files/Gun_Data_orig_fig_02.png} | ||||
\par | ||||
\end{center} | ||||
\end{codeoutput} | ||||
\end{codecell} | ||||
Normalize to the US numbers (inverse) | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
select['Homicides'] = select['Homicides']['United States'] / select['Homicides'] | ||||
select['Gun Homicides'] = select['Gun Homicides']['United States'] / select['Gun Homicides'] | ||||
display_relevant(select) | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\begin{codeoutput} | ||||
\begin{verbatim} | ||||
Homicides Gun Homicides Gun Data Source | ||||
United States 1.0 1.0 OAS 2012[5][6] | ||||
Canada 2.6 4.9 Krug 1998[13] | ||||
United Kingdom 3.5 92.5 WHO2012 [10] | ||||
\end{verbatim} | ||||
\end{codeoutput} | ||||
\end{codecell} | ||||
So, you are 2.6 times more likely to be killed in the US than Canada, | ||||
and 3.5 times more likely than in the UK. That's bad, but not extreme. | ||||
However, you are 4.9 times more likely to be killed \emph{with a gun} in | ||||
the US than Canada, and almost 100 times more likely than in the UK. | ||||
That is pretty extreme. | ||||
Countries represented: | ||||
\begin{codecell} | ||||
\begin{codeinput} | ||||
\begin{lstlisting} | ||||
for country in data.index: | ||||
print country | ||||
\end{lstlisting} | ||||
\end{codeinput} | ||||
\begin{codeoutput} | ||||
\begin{verbatim} | ||||
El Salvador | ||||
Jamaica | ||||
Honduras | ||||
Guatemala | ||||
Colombia | ||||
Brazil | ||||
Panama | ||||
Mexico | ||||
Paraguay | ||||
Nicaragua | ||||
United States | ||||
Costa Rica | ||||
Uruguay | ||||
Argentina | ||||
Barbados | ||||
Montenegro | ||||
Peru | ||||
Moldova | ||||
Israel | ||||
India | ||||
Canada | ||||
Serbia | ||||
Luxembourg | ||||
Greece | ||||
Uzbekistan | ||||
Croatia | ||||
Kyrgyzstan | ||||
Switzerland | ||||
Malta | ||||
Portugal | ||||
Belarus | ||||
Ireland | ||||
Italy | ||||
Kuwait | ||||
Ukraine | ||||
Estonia | ||||
Belgium | ||||
Finland | ||||
Lithuania | ||||
Cyprus | ||||
Bulgaria | ||||
Georgia | ||||
Denmark | ||||
France | ||||
Netherlands | ||||
Sweden | ||||
Slovakia | ||||
Qatar | ||||
Austria | ||||
Latvia | ||||
New Zealand | ||||
Spain | ||||
Hungary | ||||
Czech Republic | ||||
Hong Kong | ||||
Australia | ||||
Singapore | ||||
Chile | ||||
Germany | ||||
Slovenia | ||||
Romania | ||||
Azerbaijan | ||||
South Korea | ||||
United Kingdom | ||||
Norway | ||||
Japan | ||||
Poland | ||||
Mauritius | ||||
\end{verbatim} | ||||
\end{codeoutput} | ||||
\end{codecell} | ||||
\end{document} | ||||