|
|
%% This file was auto-generated by IPython.
|
|
|
%% Conversion from the original notebook file:
|
|
|
%% tests/ipynbref/Gun_Data.orig.ipynb
|
|
|
%%
|
|
|
\documentclass[11pt,english]{article}
|
|
|
|
|
|
%% This is the automatic preamble used by IPython. Note that it does *not*
|
|
|
%% include a documentclass declaration, that is added at runtime to the overall
|
|
|
%% document.
|
|
|
|
|
|
\usepackage{amsmath}
|
|
|
\usepackage{amssymb}
|
|
|
\usepackage{graphicx}
|
|
|
\usepackage{ucs}
|
|
|
\usepackage[utf8x]{inputenc}
|
|
|
|
|
|
% needed for markdown enumerations to work
|
|
|
\usepackage{enumerate}
|
|
|
|
|
|
% Slightly bigger margins than the latex defaults
|
|
|
\usepackage{geometry}
|
|
|
\geometry{verbose,tmargin=3cm,bmargin=3cm,lmargin=2.5cm,rmargin=2.5cm}
|
|
|
|
|
|
% Define a few colors for use in code, links and cell shading
|
|
|
\usepackage{color}
|
|
|
\definecolor{orange}{cmyk}{0,0.4,0.8,0.2}
|
|
|
\definecolor{darkorange}{rgb}{.71,0.21,0.01}
|
|
|
\definecolor{darkgreen}{rgb}{.12,.54,.11}
|
|
|
\definecolor{myteal}{rgb}{.26, .44, .56}
|
|
|
\definecolor{gray}{gray}{0.45}
|
|
|
\definecolor{lightgray}{gray}{.95}
|
|
|
\definecolor{mediumgray}{gray}{.8}
|
|
|
\definecolor{inputbackground}{rgb}{.95, .95, .85}
|
|
|
\definecolor{outputbackground}{rgb}{.95, .95, .95}
|
|
|
\definecolor{traceback}{rgb}{1, .95, .95}
|
|
|
|
|
|
% Framed environments for code cells (inputs, outputs, errors, ...). The
|
|
|
% various uses of \unskip (or not) at the end were fine-tuned by hand, so don't
|
|
|
% randomly change them unless you're sure of the effect it will have.
|
|
|
\usepackage{framed}
|
|
|
|
|
|
% remove extraneous vertical space in boxes
|
|
|
\setlength\fboxsep{0pt}
|
|
|
|
|
|
% codecell is the whole input+output set of blocks that a Code cell can
|
|
|
% generate.
|
|
|
|
|
|
% TODO: unfortunately, it seems that using a framed codecell environment breaks
|
|
|
% the ability of the frames inside of it to be broken across pages. This
|
|
|
% causes at least the problem of having lots of empty space at the bottom of
|
|
|
% pages as new frames are moved to the next page, and if a single frame is too
|
|
|
% long to fit on a page, will completely stop latex from compiling the
|
|
|
% document. So unless we figure out a solution to this, we'll have to instead
|
|
|
% leave the codecell env. as empty. I'm keeping the original codecell
|
|
|
% definition here (a thin vertical bar) for reference, in case we find a
|
|
|
% solution to the page break issue.
|
|
|
|
|
|
%% \newenvironment{codecell}{%
|
|
|
%% \def\FrameCommand{\color{mediumgray} \vrule width 1pt \hspace{5pt}}%
|
|
|
%% \MakeFramed{\vspace{-0.5em}}}
|
|
|
%% {\unskip\endMakeFramed}
|
|
|
|
|
|
% For now, make this a no-op...
|
|
|
\newenvironment{codecell}{}
|
|
|
|
|
|
\newenvironment{codeinput}{%
|
|
|
\def\FrameCommand{\colorbox{inputbackground}}%
|
|
|
\MakeFramed{\advance\hsize-\width \FrameRestore}}
|
|
|
{\unskip\endMakeFramed}
|
|
|
|
|
|
\newenvironment{codeoutput}{%
|
|
|
\def\FrameCommand{\colorbox{outputbackground}}%
|
|
|
\vspace{-1.4em}
|
|
|
\MakeFramed{\advance\hsize-\width \FrameRestore}}
|
|
|
{\unskip\medskip\endMakeFramed}
|
|
|
|
|
|
\newenvironment{traceback}{%
|
|
|
\def\FrameCommand{\colorbox{traceback}}%
|
|
|
\MakeFramed{\advance\hsize-\width \FrameRestore}}
|
|
|
{\endMakeFramed}
|
|
|
|
|
|
% Use and configure listings package for nicely formatted code
|
|
|
\usepackage{listingsutf8}
|
|
|
\lstset{
|
|
|
language=python,
|
|
|
inputencoding=utf8x,
|
|
|
extendedchars=\true,
|
|
|
aboveskip=\smallskipamount,
|
|
|
belowskip=\smallskipamount,
|
|
|
xleftmargin=2mm,
|
|
|
breaklines=true,
|
|
|
basicstyle=\small \ttfamily,
|
|
|
showstringspaces=false,
|
|
|
keywordstyle=\color{blue}\bfseries,
|
|
|
commentstyle=\color{myteal},
|
|
|
stringstyle=\color{darkgreen},
|
|
|
identifierstyle=\color{darkorange},
|
|
|
columns=fullflexible, % tighter character kerning, like verb
|
|
|
}
|
|
|
|
|
|
% The hyperref package gives us a pdf with properly built
|
|
|
% internal navigation ('pdf bookmarks' for the table of contents,
|
|
|
% internal cross-reference links, web links for URLs, etc.)
|
|
|
\usepackage{hyperref}
|
|
|
\hypersetup{
|
|
|
breaklinks=true, % so long urls are correctly broken across lines
|
|
|
colorlinks=true,
|
|
|
urlcolor=blue,
|
|
|
linkcolor=darkorange,
|
|
|
citecolor=darkgreen,
|
|
|
}
|
|
|
|
|
|
% hardcode size of all verbatim environments to be a bit smaller
|
|
|
\makeatletter
|
|
|
\g@addto@macro\@verbatim\small\topsep=0.5em\partopsep=0pt
|
|
|
\makeatother
|
|
|
|
|
|
% Prevent overflowing lines due to urls and other hard-to-break entities.
|
|
|
\sloppy
|
|
|
|
|
|
\begin{document}
|
|
|
|
|
|
\section{Some gun violence analysis with Wikipedia data}
|
|
|
As
|
|
|
\href{https://twitter.com/jonst0kes/status/282330530412888064}{requested
|
|
|
by John Stokes}, here are per-capita numbers for gun-related homicides,
|
|
|
relating to GDP and total homicides, so the situation in the United
|
|
|
States can be put in context relative to other nations.
|
|
|
|
|
|
main data source is UNODC (via Wikipedia
|
|
|
\href{http://en.wikipedia.org/wiki/List\_of\_countries\_by\_intentional\_homicide\_rate}{here}
|
|
|
and
|
|
|
\href{http://en.wikipedia.org/wiki/List\_of\_countries\_by\_firearm-related\_death\_rate}{here}).
|
|
|
|
|
|
GDP data from World Bank, again
|
|
|
\href{http://en.wikipedia.org/wiki/List\_of\_countries\_by\_GDP\_(PPP)\_per\_capita}{via
|
|
|
Wikipedia}.
|
|
|
|
|
|
If the numbers on Wikipedia are inaccurate, or their relationship is not
|
|
|
sound (e.g.~numbers taken from different years, during which significant
|
|
|
change occured) then obviously None of this analysis is valid.
|
|
|
|
|
|
To summarize the data, every possible way you look at it the US is lousy
|
|
|
at preventing gun violence. Even when compared to significantly more
|
|
|
violent places, gun violence in the US is a serious problem, and when
|
|
|
compared to similarly wealthy places, the US is an outstanding disaster.
|
|
|
|
|
|
\textbf{UPDATE:} the relationship of the gun data and totals does not
|
|
|
seem to be valid.
|
|
|
\href{http://www2.fbi.gov/ucr/cius2009/offenses/violent\_crime/index.html}{FBI
|
|
|
data} suggests that the relative contribution of guns to homicides in
|
|
|
the US is 47\%, but relating these two data sources gives 80\%. Internal
|
|
|
comparisons should still be fine, but `fraction' analysis has been
|
|
|
stricken.
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
%load_ext retina
|
|
|
%pylab inline
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\begin{codeoutput}
|
|
|
\begin{verbatim}
|
|
|
Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline].
|
|
|
For more information, type 'help(pylab)'.
|
|
|
\end{verbatim}
|
|
|
\end{codeoutput}
|
|
|
\end{codecell}
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
from IPython.display import display
|
|
|
import pandas
|
|
|
pandas.set_option('display.notebook_repr_html', True)
|
|
|
pandas.set_option('display.precision', 2)
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\end{codecell}
|
|
|
Some utility functions for display
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
def plot_percent(df, limit=10):
|
|
|
df['Gun Percent'][:limit].plot()
|
|
|
plt.ylim(0,100)
|
|
|
plt.title("% Gun Homicide")
|
|
|
plt.show()
|
|
|
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\end{codecell}
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
def plot_percapita(df, limit=10):
|
|
|
df = df.ix[:,['Homicides', 'Gun Homicides']][:limit]
|
|
|
df['Total Homicides'] = df['Homicides'] - df['Gun Homicides']
|
|
|
del df['Homicides']
|
|
|
df.plot(kind='bar', stacked=True, sort_columns=True)
|
|
|
plt.ylabel("per 100k")
|
|
|
plt.show()
|
|
|
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\end{codecell}
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
def display_relevant(df, limit=10):
|
|
|
display(df.ix[:,['Homicides', 'Gun Homicides', 'Gun Data Source']][:limit])
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\end{codecell}
|
|
|
Load the data
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
totals = pandas.read_csv('totals.csv', '\t', index_col=0)
|
|
|
guns = pandas.read_csv('guns.csv', '\t', index_col=0)
|
|
|
gdp = pandas.read_csv('gdp.csv', '\t', index_col=1)
|
|
|
data = totals.join(guns).join(gdp)
|
|
|
data['Gun Percent'] = 100 * data['Gun Homicides'] / data['Homicides']
|
|
|
del data['Unintentional'],data['Undetermined'],data['Gun Suicides']
|
|
|
data = data.dropna()
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\end{codecell}
|
|
|
Of all sampled countries (Found data for 68 countries), the US is in the
|
|
|
top 15 in Gun Homicides per capita.
|
|
|
|
|
|
Numbers are per 100k.
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
data = data.sort("Gun Homicides", ascending=False)
|
|
|
display_relevant(data, 15)
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\begin{codeoutput}
|
|
|
\begin{verbatim}
|
|
|
Homicides Gun Homicides Gun Data Source
|
|
|
Country
|
|
|
El Salvador 69.2 50.4 OAS 2011[1]
|
|
|
Jamaica 52.2 47.4 OAS 2011[1]
|
|
|
Honduras 91.6 46.7 OAS 2011[1]
|
|
|
Guatemala 38.5 38.5 OAS 2011[1]
|
|
|
Colombia 33.4 27.1 UNODC 2011 [2]
|
|
|
Brazil 21.0 18.1 UNODC 2011[3]
|
|
|
Panama 21.6 12.9 OAS 2011[1]
|
|
|
Mexico 16.9 10.0 UNODC 2011[4]
|
|
|
Paraguay 11.5 7.3 UNODC 2000[11]
|
|
|
Nicaragua 13.6 7.1 OAS 2011[1]
|
|
|
United States 4.2 3.7 OAS 2012[5][6]
|
|
|
Costa Rica 10.0 3.3 UNODC 2002[7]
|
|
|
Uruguay 5.9 3.2 UNODC 2002[7]
|
|
|
Argentina 3.4 3.0 UNODC 2011[12]
|
|
|
Barbados 11.3 3.0 UNODC 2000[11]
|
|
|
\end{verbatim}
|
|
|
\end{codeoutput}
|
|
|
\end{codecell}
|
|
|
Take top 30 Countries by GDP
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
top = data.sort('GDP')[-30:]
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\end{codecell}
|
|
|
and rank them by Gun Homicides per capita:
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
top_by_guns = top.sort("Gun Homicides", ascending=False)
|
|
|
display_relevant(top_by_guns, 5)
|
|
|
plot_percapita(top_by_guns, 10)
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\begin{codeoutput}
|
|
|
\begin{verbatim}
|
|
|
Homicides Gun Homicides Gun Data Source
|
|
|
Country
|
|
|
United States 4.2 3.7 OAS 2012[5][6]
|
|
|
Israel 2.1 0.9 WHO 2012[10]
|
|
|
Canada 1.6 0.8 Krug 1998[13]
|
|
|
Luxembourg 2.5 0.6 WHO 2012[10]
|
|
|
Greece 1.5 0.6 Krug 1998[13]
|
|
|
\end{verbatim}
|
|
|
\begin{center}
|
|
|
\includegraphics[width=0.7\textwidth]{Gun_Data_orig_files/Gun_Data_orig_fig_00.png}
|
|
|
\par
|
|
|
\end{center}
|
|
|
\end{codeoutput}
|
|
|
\end{codecell}
|
|
|
\textbf{NOTE:} these bar graphs should not be interpreted as fractions
|
|
|
of a total, as the two data sources do not appear to be comparable. But
|
|
|
the red and blue bar graphs should still be internally comparable.
|
|
|
|
|
|
The US is easily \#1 of 30 wealthiest countries in Gun Homicides per
|
|
|
capita, by a factor of 4:1
|
|
|
|
|
|
Adding USA, Canada, and Mexico to all of Europe, USA is a strong \#2
|
|
|
behind Mexico in total gun homicides per-capita
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
index = (data['Region'] == 'Europe') + \
|
|
|
(data.index == 'United States') + \
|
|
|
(data.index == 'Canada') + \
|
|
|
(data.index == 'Mexico')
|
|
|
selected = data[index]
|
|
|
|
|
|
print "By Total Gun Homicides"
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
by_guns = selected.sort("Gun Homicides", ascending=False)
|
|
|
#by_guns['Gun Homicides'].plot(kind='bar')
|
|
|
plot_percapita(by_guns, limit=25)
|
|
|
display_relevant(selected, limit=None)
|
|
|
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\begin{codeoutput}
|
|
|
\begin{verbatim}
|
|
|
By Total Gun Homicides
|
|
|
\end{verbatim}
|
|
|
\begin{center}
|
|
|
\includegraphics[width=0.7\textwidth]{Gun_Data_orig_files/Gun_Data_orig_fig_01.png}
|
|
|
\par
|
|
|
\end{center}
|
|
|
\begin{verbatim}
|
|
|
Homicides Gun Homicides Gun Data Source
|
|
|
Country
|
|
|
Mexico 16.9 10.0 UNODC 2011[4]
|
|
|
United States 4.2 3.7 OAS 2012[5][6]
|
|
|
Montenegro 3.5 2.1 WHO 2012[10]
|
|
|
Moldova 7.5 1.0 WHO 2012[10]
|
|
|
Canada 1.6 0.8 Krug 1998[13]
|
|
|
Serbia 1.2 0.6 WHO 2012[10]
|
|
|
Luxembourg 2.5 0.6 WHO 2012[10]
|
|
|
Greece 1.5 0.6 Krug 1998[13]
|
|
|
Croatia 1.4 0.6 WHO 2012[10]
|
|
|
Switzerland 0.7 0.5 OAS 2011[1]
|
|
|
Malta 1.0 0.5 WHO 2012[10]
|
|
|
Portugal 1.2 0.5 WHO 2012[10]
|
|
|
Belarus 4.9 0.4 UNODC 2002[7]
|
|
|
Ireland 1.2 0.4 WHO 2012[10]
|
|
|
Italy 0.9 0.4 WHO 2012[10]
|
|
|
Ukraine 5.2 0.3 UNODC 2000[11]
|
|
|
Estonia 5.2 0.3 WHO 2012[10]
|
|
|
Belgium 1.7 0.3 WHO 2012[10]
|
|
|
Finland 2.2 0.3 WHO 2012[10]
|
|
|
Lithuania 6.6 0.2 WHO 2012[10]
|
|
|
Bulgaria 2.0 0.2 WHO 2012[10]
|
|
|
Georgia 4.3 0.2 WHO 2012[10]
|
|
|
Denmark 0.9 0.2 WHO 2012[10]
|
|
|
France 1.1 0.2 WHO 2012[10]
|
|
|
Netherlands 1.1 0.2 WHO 2012[10]
|
|
|
Sweden 1.0 0.2 WHO 2012[10]
|
|
|
Slovakia 1.5 0.2 WHO 2012[10]
|
|
|
Austria 0.6 0.2 WHO 2012[10]
|
|
|
Latvia 3.1 0.2 WHO 2012[10]
|
|
|
Spain 0.8 0.1 WHO 2012[10]
|
|
|
Hungary 1.3 0.1 WHO 2012[10]
|
|
|
Czech Republic 1.7 0.1 WHO 2012[10]
|
|
|
Germany 0.8 0.1 WHO 2012[10]
|
|
|
Slovenia 0.7 0.1 WHO 2012[10]
|
|
|
Romania 2.0 0.0 WHO 2012[10]
|
|
|
United Kingdom 1.2 0.0 WHO2012 [10]
|
|
|
Norway 0.6 0.0 WHO 2012[10]
|
|
|
Poland 1.1 0.0 WHO 2012[10]
|
|
|
\end{verbatim}
|
|
|
\end{codeoutput}
|
|
|
\end{codecell}
|
|
|
Let's just compare US, Canada, and UK:
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
select = data.ix[['United States', 'Canada', 'United Kingdom']]
|
|
|
plot_percapita(select)
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\begin{codeoutput}
|
|
|
\begin{center}
|
|
|
\includegraphics[width=0.7\textwidth]{Gun_Data_orig_files/Gun_Data_orig_fig_02.png}
|
|
|
\par
|
|
|
\end{center}
|
|
|
\end{codeoutput}
|
|
|
\end{codecell}
|
|
|
Normalize to the US numbers (inverse)
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
select['Homicides'] = select['Homicides']['United States'] / select['Homicides']
|
|
|
select['Gun Homicides'] = select['Gun Homicides']['United States'] / select['Gun Homicides']
|
|
|
display_relevant(select)
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\begin{codeoutput}
|
|
|
\begin{verbatim}
|
|
|
Homicides Gun Homicides Gun Data Source
|
|
|
United States 1.0 1.0 OAS 2012[5][6]
|
|
|
Canada 2.6 4.9 Krug 1998[13]
|
|
|
United Kingdom 3.5 92.5 WHO2012 [10]
|
|
|
\end{verbatim}
|
|
|
\end{codeoutput}
|
|
|
\end{codecell}
|
|
|
So, you are 2.6 times more likely to be killed in the US than Canada,
|
|
|
and 3.5 times more likely than in the UK. That's bad, but not extreme.
|
|
|
|
|
|
However, you are 4.9 times more likely to be killed \emph{with a gun} in
|
|
|
the US than Canada, and almost 100 times more likely than in the UK.
|
|
|
That is pretty extreme.
|
|
|
|
|
|
Countries represented:
|
|
|
|
|
|
\begin{codecell}
|
|
|
\begin{codeinput}
|
|
|
\begin{lstlisting}
|
|
|
for country in data.index:
|
|
|
print country
|
|
|
\end{lstlisting}
|
|
|
\end{codeinput}
|
|
|
\begin{codeoutput}
|
|
|
\begin{verbatim}
|
|
|
El Salvador
|
|
|
Jamaica
|
|
|
Honduras
|
|
|
Guatemala
|
|
|
Colombia
|
|
|
Brazil
|
|
|
Panama
|
|
|
Mexico
|
|
|
Paraguay
|
|
|
Nicaragua
|
|
|
United States
|
|
|
Costa Rica
|
|
|
Uruguay
|
|
|
Argentina
|
|
|
Barbados
|
|
|
Montenegro
|
|
|
Peru
|
|
|
Moldova
|
|
|
Israel
|
|
|
India
|
|
|
Canada
|
|
|
Serbia
|
|
|
Luxembourg
|
|
|
Greece
|
|
|
Uzbekistan
|
|
|
Croatia
|
|
|
Kyrgyzstan
|
|
|
Switzerland
|
|
|
Malta
|
|
|
Portugal
|
|
|
Belarus
|
|
|
Ireland
|
|
|
Italy
|
|
|
Kuwait
|
|
|
Ukraine
|
|
|
Estonia
|
|
|
Belgium
|
|
|
Finland
|
|
|
Lithuania
|
|
|
Cyprus
|
|
|
Bulgaria
|
|
|
Georgia
|
|
|
Denmark
|
|
|
France
|
|
|
Netherlands
|
|
|
Sweden
|
|
|
Slovakia
|
|
|
Qatar
|
|
|
Austria
|
|
|
Latvia
|
|
|
New Zealand
|
|
|
Spain
|
|
|
Hungary
|
|
|
Czech Republic
|
|
|
Hong Kong
|
|
|
Australia
|
|
|
Singapore
|
|
|
Chile
|
|
|
Germany
|
|
|
Slovenia
|
|
|
Romania
|
|
|
Azerbaijan
|
|
|
South Korea
|
|
|
United Kingdom
|
|
|
Norway
|
|
|
Japan
|
|
|
Poland
|
|
|
Mauritius
|
|
|
\end{verbatim}
|
|
|
\end{codeoutput}
|
|
|
\end{codecell}
|
|
|
\end{document}
|
|
|
|