import pandas as pd
import matplotlib.pyplot as plt
import folium 
import numpy as np

import statsmodels.api as sm
import scipy.stats as stats
from statsmodels.formula.api import ols
import sklearn
#from sklearn.tree import DecisionTreeClassifier 
#from sklearn.metrics import mean_squared_error
#from sklearn.model_selection import train_test_split


#read the csv through pandas
raw_data = pd.read_csv("fatal-police-shootings-data.csv", sep = ",")

#show the table
raw_data.head()


raw_data = raw_data.replace({'A': 'Asian', 'W':'White','B':'Black', 'N':'Native', 'H':'Hispanic', 'O':'Other', np.nan : 'Unknown'})
raw_data.head()


#sort by race and date
raw_data.sort_values(by = ["race", "date"])


#dictionary to collect the values. The key value pairs will be Race: Num Killed for that Race. 
race_dict = {}

#for each row in the raw data, increment the corresponding race value by 1 
for index, row in raw_data.iterrows():
    race = raw_data['race'][index]
    if race in race_dict.keys():
        race_dict[race] = race_dict[race] + 1
    else:
        race_dict[race] = 1

tidied_data = pd.DataFrame(list(race_dict.items()),columns = ['Race', 'Number Killed'])

tidied_data.head()


#graph bar plot of race and total fatal shootings for that group
tidied_data.plot.bar(x = "Race", y = "Number Killed", title = "Count of Fatal Shootings")

<AxesSubplot:title={'center':'Count of Fatal Shootings'}, xlabel='Race'>


#separate the raw data by race 
groups = raw_data.groupby('race')

#this dictionary consists of the demographics listed above 
demogs = {'White': 60.1, 'Black': 13.4, 'Asian': 5.9, 'Hispanic': 18.5, 'Native': 1.3}

#iterate over each race category - for each one, collect the data for the new columns 
for race in demogs.keys():
    df = groups.get_group(race)
    male, female = 0, 0
    below18, eighteen_thirty, thirty_sixty, sixty_greater = 0,0,0,0
    num_shot, num_tasered = 0,0
    mi = 0
    body_cam = 0
    attacked = 0
    fleed = 0
    for index, row in df.iterrows():
        if df['gender'][index] == "M":
            male += 1
        else:
            female += 1
        
        age = df['age'][index]
        
        if age != 'Unknown':
            if age < 18:
                below18 += 1
            elif age >= 18 and age < 30:
                eighteen_thirty += 1
            elif age >= 30 and age < 60:
                thirty_sixty +=1
            else:
                sixty_greater += 1
            
        if df['manner_of_death'][index] == 'shot':
            num_shot += 1
        else:
            num_tasered += 1 
            
        if df['signs_of_mental_illness'][index] == True:
            mi += 1
        
        if df['body_camera'][index] == False:
            body_cam += 1
            
        if df['threat_level'][index] != 'attack':
            attacked += 1
            
        if df['flee'][index] == 'Car' or df['flee'][index] == 'Foot':
            fleed += 1
            
    #add the data to the new columns 
   
    tidied_data.loc[tidied_data.Race == race, 'Prop males'] = male/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop females'] = female/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Age <18'] = below18/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Age >=18, <30'] = eighteen_thirty/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Age >=30, <60'] = thirty_sixty/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Age >=60'] = sixty_greater/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Num shot'] = num_shot/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Num shot and tasered'] = num_tasered/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Num mental illness signs'] = mi/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop No Body Camera on Police'] = body_cam/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Person not attacked'] = attacked/demogs[race]
    tidied_data.loc[tidied_data.Race == race, 'Prop Fleed on Car/Foot'] = fleed/demogs[race]


tidied_data


#drop unknown categories
tidied_data.drop(tidied_data.loc[tidied_data['Race']== 'Other'].index, inplace=True)
tidied_data.drop(tidied_data.loc[tidied_data['Race']== 'Unknown'].index, inplace=True)

#let's add the demographic percentages to the table in case we need it in the future    
demographic = [5.9, 60.1, 18.5, 13.4, 1.3]
tidied_data['demo'] = demographic

#convert the first column we had (Number killed) to a proportion
tidied_data["Prop Killed"] = tidied_data['Number Killed']/tidied_data['demo']
tidied_data


set(raw_data['armed'])

{'Airsoft pistol',
 'BB gun',
 'BB gun and vehicle',
 'Taser',
 'Unknown',
 'air conditioner',
 'air pistol',
 'ax',
 'barstool',
 'baseball bat',
 'baseball bat and bottle',
 'baseball bat and fireplace poker',
 'baseball bat and knife',
 'baton',
 'bayonet',
 'bean-bag gun',
 'beer bottle',
 'binoculars',
 'blunt object',
 'bottle',
 'bow and arrow',
 'box cutter',
 'brick',
 'car, knife and mace',
 'carjack',
 'chain',
 'chain saw',
 'chainsaw',
 'chair',
 'claimed to be armed',
 "contractor's level",
 'cordless drill',
 'crossbow',
 'crowbar',
 'fireworks',
 'flagpole',
 'flashlight',
 'garden tool',
 'glass shard',
 'grenade',
 'gun',
 'gun and car',
 'gun and knife',
 'gun and machete',
 'gun and sword',
 'gun and vehicle',
 'guns and explosives',
 'hammer',
 'hand torch',
 'hatchet',
 'hatchet and gun',
 'ice pick',
 'incendiary device',
 'knife',
 'knife and vehicle',
 'lawn mower blade',
 'machete',
 'machete and gun',
 'meat cleaver',
 'metal hand tool',
 'metal object',
 'metal pipe',
 'metal pole',
 'metal rake',
 'metal stick',
 'microphone',
 'motorcycle',
 'nail gun',
 'oar',
 'pellet gun',
 'pen',
 'pepper spray',
 'pick-axe',
 'piece of wood',
 'pipe',
 'pitchfork',
 'pole',
 'pole and knife',
 'railroad spikes',
 'rock',
 'samurai sword',
 'scissors',
 'screwdriver',
 'sharp object',
 'shovel',
 'spear',
 'stapler',
 'straight edge razor',
 'sword',
 'tire iron',
 'toy weapon',
 'unarmed',
 'undetermined',
 'unknown weapon',
 'vehicle',
 'vehicle and gun',
 'vehicle and machete',
 'walking stick',
 'wasp spray',
 'wrench'}


#make the nonlethal and lethal lists 
Nonlethal = [ 'walking stick',
 'wasp spray',
 'wrench', 'vehicle','toy weapon',
 'unarmed',
 'unknown','stapler','shovel','scissors',
 'screwdriver','rock','pole','piece of wood','metal hand tool',
 'metal object',
 'metal pipe',
 'metal pole',
 'metal rake',
 'metal stick',
 'motorcycle',
 'nail gun',
 'oar',
 'pellet gun',
 'pen',
 'pepper spray',
 'pipe','ice pick','hammer',
 'hand torch','crowbar',
 'fireworks',
 'flagpole',
 'flashlight',
 'garden tool',
 'glass shard', 'chair',
 "contractor's level",
 'cordless drill','carjack',
 'chain','box cutter',
 'brick',
 'beer bottle',
 'blunt object', 'baton','barstool',
 'baseball bat',
 'baseball bat and bottle',
 'baseball bat and fireplace poker','air conditioner',
 'air pistol','BB gun',
 'BB gun and vehicle',
    
]

Lethal = ['vehicle and gun',
 'vehicle and machete','straight edge razor',
 'sword', 'spear','sharp object','samurai sword','pole and knife','pitchfork','pick-axe','incendiary device',
 'knife',
 'lawn mower blade',
 'machete',
 'machete and gun',
 'meat cleaver', 'hatchet',
 'hatchet and gun','grenade',
 'gun',
 'gun and car',
 'gun and knife',
 'gun and sword',
 'gun and vehicle',
 'guns and explosives','crossbow', 'chain saw',
 'chainsaw','car, knife and mace','bow and arrow','bean-bag gun','bayonet','baseball bat and knife','ax','Taser',
 'Airsoft pistol'   
]

groups = raw_data.groupby('race')

#make a columns to count the number of nonlethal weapons carried 
for race in demogs.keys():
    df = groups.get_group(race)
    non_lethal = 0 
    for index, row in df.iterrows():
            
        if df['armed'][index] in Nonlethal:
            non_lethal += 1 
   
    tidied_data.loc[tidied_data.Race == race, 'Prop Carried nonlethal'] = non_lethal/demogs[race]

tidied_data.head()


tidied_data.plot.bar(x = "Race", y = "Prop Killed", xlabel = "Race", ylabel = "Proportion of Individuals per 1%", title = "Proportional Count of Fatal Shootings")

<AxesSubplot:title={'center':'Proportional Count of Fatal Shootings'}, xlabel='Race', ylabel='Proportion of Individuals per 1%'>


tidied_data.head()


#graph the other categories 
tidied_data.plot.bar(x = "Race", y = 
                      ["Prop Carried nonlethal", "Prop Age <18", 'Prop Num mental illness signs',"Prop Num shot and tasered",
                      'Prop No Body Camera on Police', 'Prop Person not attacked'], 
                     stacked = False, figsize = (20,10), title = "Visualization of Attributes of Shootings", xlabel = "Race", ylabel = "Proportion of Individuals per 1%").legend(loc = 'best')

<matplotlib.legend.Legend at 0x7faa7bab08b0>


#create the dictionary 
date_dict = {} 

#loop through the raw data and fill in the dictionary as described above 
for index, row in raw_data.iterrows():
    race = raw_data['race'][index]
    date = raw_data['date'][index].split('-')[0]
    if race in date_dict.keys():
        datedict = date_dict[race]
        if date in datedict.keys():
            datedict[date] = datedict[date] + 1
        else:
            datedict[date] = 1
    else:
        date_dict[race] = {}
        date_dict[race][date] = 1


date_list = []

for race in date_dict.keys():
    #exclude unknown and other as they do not have demographics 
    if race != 'Unknown' and race != 'Other': 
        dates = date_dict[race]
        for date in dates.keys():
            date_list.append(tuple((race, date, dates[date], dates[date]/demogs[race])))

date_df = pd.DataFrame(date_list, columns =['Race', 'Date', 'Num Killed', 'Proportion'])

date_df['Date'] = date_df['Date'].apply(int)
date_df.head()


fig, ax = plt.subplots()

groups = date_df.groupby('Race')

for race in groups.groups.keys():
    df = groups.get_group(race)
    df.plot(x = 'Date', y = 'Proportion', ax = ax, xlim = ([2015, 2020]), legend = True, label = race, 
            title = "Shootings over time for each race(no 2020)", xlabel = "Race", ylabel = "Proportion of individuals per 1%" ).legend(loc = 'best')


total_dict = {} 

for index, row in raw_data.iterrows():
    date = raw_data['date'][index].split('-')[0]
       
    if date != '2021': 
        if date in total_dict.keys():
            total_dict[date] = total_dict[date] + 1
        else:
            total_dict[date] = 1


total_dict

df = pd.DataFrame.from_dict(total_dict, columns = ['total killed'], orient = 'index')

df.plot(title = "Total Fatal Shootings over Time", xlabel = "year", ylabel = "number shootings")

<AxesSubplot:title={'center':'Total Fatal Shootings over Time'}, xlabel='year', ylabel='number shootings'>


#dictionary to collect the values. 
state_dict = {}

#each state will have a dictionary mapping to race/total: # pairs 
#first collect the total and counts for each race from the raw data, excluding unknown/other since again we are using proportions
for index, row in raw_data.iterrows():
    state = raw_data['state'][index]
    race = raw_data['race'][index]
    if state in state_dict.keys():
        state_dict[state]["total"] = state_dict[state]["total"] + 1
        if race in demogs.keys(): 
            if race in state_dict[state].keys():
                state_dict[state][race] = state_dict[state][race] + 1
            else:
                state_dict[state][race] = 1
    else:
        state_dict[state] = {} 
        state_dict[state]["total"] = 1 
        if race in demogs.keys():
            state_dict[state][race] = 1 
    
#to account for missing values, if the state does not have a count for a certain race, input 0 
for state in state_dict.keys():
    if "White" not in state_dict[state].keys():
        state_dict[state]["White"] = 0
    
    if "Asian" not in state_dict[state].keys():
        state_dict[state]["Asian"] = 0
        
    if "Black" not in state_dict[state].keys():
        state_dict[state]["Black"] = 0
        
    if "Native" not in state_dict[state].keys():
        state_dict[state]["Native"] = 0
    
    if "Hispanic" not in state_dict[state].keys():
        state_dict[state]["Hispanic"] = 0
        
    #calculate the proportion for the races so that they are comparable
    for race in state_dict[state].keys():
        if race != "total":
            state_dict[state][race] = state_dict[state][race]/demogs[race]
        

state_data = pd.DataFrame.from_dict(state_dict, orient = 'index')
state_data["State"] = state_data.index
state_data.head()


#get the data for the states geolocation 
url = (
    "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data"
)
state_geo = f"{url}/us-states.json"

#make a map of the US 
m = folium.Map(location=[48, -102], zoom_start=4)

#make the chloropleth, binding the state geolocation data to the state column of our table 
folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "total"],
    key_on="feature.id",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Total Shootings",
).add_to(m)

folium.LayerControl().add_to(m)

m


m = folium.Map(location=[48, -102], zoom_start=4)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "Black"],
    key_on="feature.id",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Prop Black Shootings",
).add_to(m)

folium.LayerControl().add_to(m)

m


m = folium.Map(location=[48, -102], zoom_start=4)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "White"],
    key_on="feature.id",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Prop White Shootings",
).add_to(m)

folium.LayerControl().add_to(m)

m


m = folium.Map(location=[48, -102], zoom_start=4)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "Asian"],
    key_on="feature.id",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Prop Asian Shootings",
).add_to(m)

folium.LayerControl().add_to(m)

m


m = folium.Map(location=[48, -102], zoom_start=4)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "Hispanic"],
    key_on="feature.id",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Prop Hispanic Shootings",
).add_to(m)

folium.LayerControl().add_to(m)

m


m = folium.Map(location=[48, -102], zoom_start=4)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "Native"],
    key_on="feature.id",
    fill_color="YlOrRd",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Prop Native Shootings",
).add_to(m)

folium.LayerControl().add_to(m)

m


date_df = date_df.drop(columns = "Num Killed")
date_df.head()


anova_df = date_df.reset_index().groupby(['Date', 'Race'])['Proportion'].aggregate('first').unstack()
anova_df.head()


# stats f_oneway functions takes the groups as input and returns ANOVA F and p value
fvalue, pvalue = stats.f_oneway(anova_df['Asian'], anova_df['Black'], anova_df['Hispanic'], anova_df['Native'], anova_df['White'])
print(fvalue, pvalue)


# Ordinary Least Squares (OLS) model
model = ols('Proportion ~ C(Race)', data=date_df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
anova_table

13.560838253894635 2.002043061277953e-06


#get the relevant categories from our raw data
ml_data = pd.DataFrame(raw_data, columns = ['age', 'gender', 'race', 'state','signs_of_mental_illness', 'threat_level', 'flee', 'body_camera'])

#for ages that are unknown, we can drop those rows 
ml_data.drop(ml_data.loc[ml_data['age']=='Unknown'].index, inplace=True)

ml_data.head()


#encode the categorical features with LabelEncoder
categorical = ['age', 'gender', 'race', 'state', 'signs_of_mental_illness', 'threat_level', 'flee', 'body_camera']
le = sklearn.preprocessing.LabelEncoder()

for column in categorical:
    ml_data[column] = le.fit_transform(ml_data[column])
    
ml_data.head()


y = ml_data['race']
X = ml_data.drop('race',axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)


# instantiate the model 
tree = DecisionTreeClassifier()
# fit the model 
tree.fit(X_train, y_train)

#predicting the target value from the model for the samples
y_test_tree = tree.predict(X_test)
y_train_tree = tree.predict(X_train)

#we can compute the accuracy of our model 
acc_train_tree = tree.score(X_train, y_train)
acc_test_tree = tree.score(X_test, y_test)

#we can also compute the Root Mean Squared Error of the model (the error)
rmse_train_tree = np.sqrt(mean_squared_error(y_train, y_train_tree))
rmse_test_tree = np.sqrt(mean_squared_error(y_test, y_test_tree))

print("Decision Tree: Accuracy on training Data: {:.3f}".format(acc_train_tree))
print("Decision Tree: Accuracy on test Data: {:.3f}".format(acc_test_tree))
print('\nDecision Tree: The RMSE of the training set is:', rmse_train_tree)
print('Decision Tree: The RMSE of the testing set is:', rmse_test_tree)

Decision Tree: Accuracy on training Data: 0.906
Decision Tree: Accuracy on test Data: 0.410

Decision Tree: The RMSE of the training set is: 1.1529434172976283
Decision Tree: The RMSE of the testing set is: 2.956374617692101

	id	name	date	manner_of_death	armed	age	gender	race	city	state	signs_of_mental_illness	threat_level	flee	body_camera	longitude	latitude	is_geocoding_exact
0	3	Tim Elliot	2015-01-02	shot	gun	53.0	M	A	Shelton	WA	True	attack	Not fleeing	False	-123.122	47.247	True
1	4	Lewis Lee Lembke	2015-01-02	shot	gun	47.0	M	W	Aloha	OR	False	attack	Not fleeing	False	-122.892	45.487	True
2	5	John Paul Quintero	2015-01-03	shot and Tasered	unarmed	23.0	M	H	Wichita	KS	False	other	Not fleeing	False	-97.281	37.695	True
3	8	Matthew Hoffman	2015-01-04	shot	toy weapon	32.0	M	W	San Francisco	CA	True	attack	Not fleeing	False	-122.422	37.763	True
4	9	Michael Rodriguez	2015-01-04	shot	nail gun	39.0	M	H	Evans	CO	False	attack	Not fleeing	False	-104.692	40.384	True

	id	name	date	manner_of_death	armed	age	gender	race	city	state	signs_of_mental_illness	threat_level	flee	body_camera	longitude	latitude	is_geocoding_exact
0	3	Tim Elliot	2015-01-02	shot	gun	53.0	M	Asian	Shelton	WA	True	attack	Not fleeing	False	-123.122	47.247	True
70	346	Matautu Nuu	2015-01-28	shot and Tasered	hammer	35.0	M	Asian	Stockton	CA	True	attack	Not fleeing	False	-121.316	38.029	True
153	195	Hung Trieu	2015-03-01	shot	gun	35.0	M	Asian	Houston	TX	False	attack	Not fleeing	False	-95.622	29.704	True
160	249	Carl Lao	2015-03-04	shot	gun	28.0	M	Asian	Stockton	CA	False	attack	Not fleeing	False	-121.286	37.948	True
265	359	Joseph Jeremy Weber	2015-04-08	shot	knife	28.0	M	Asian	Sunnyvale	CA	True	other	Not fleeing	False	-121.995	37.404	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
6212	6778	Peyton Ham	2021-04-13	shot	knife	16.0	M	White	Leonardtown	MD	False	attack	Not fleeing	False	-76.634	38.3	True
6217	6792	Jeffrey Guy Sacks	2021-04-15	shot	knife	26.0	M	White	North Lauderdale	FL	True	other	Not fleeing	False	-80.222	26.207	True
6223	6783	Bradley Michael Olsen	2021-04-18	shot	gun	30.0	M	White	Burnsville	MN	False	attack	Car	True	-93.266	44.737	True
6238	6801	Benjamin Ridley	2021-04-24	shot	gun	29.0	M	White	Webbers Falls	OK	False	other	Unknown	False	-95.171	35.553	True
6240	6804	Richard Solitro	2021-04-24	shot	undetermined	34.0	M	White	Los Angeles	CA	True	other	Not fleeing	True	-118.362	34.098	True

	Race	Number Killed	Prop males	Prop females	Prop Age <18	Prop Age >=18, <30	Prop Age >=30, <60	Prop Age >=60	Prop Num shot	Prop Num shot and tasered	Prop Num mental illness signs	Prop No Body Camera on Police	Prop Person not attacked	Prop Fleed on Car/Foot
0	Asian	105	16.949153	0.847458	0.677966	4.406780	11.186441	0.847458	16.440678	1.355932	4.576271	14.576271	8.305085	3.389831
1	White	2886	45.174709	2.845258	0.582363	10.782030	31.913478	4.009983	45.723794	2.296173	14.143095	42.961730	16.222962	12.529118
2	Hispanic	1057	55.405405	1.729730	1.513514	21.081081	31.783784	1.189189	54.054054	3.081081	10.108108	49.081081	23.567568	17.621622
3	Black	1507	108.656716	3.805970	3.134328	47.686567	55.820896	3.805970	106.194030	6.268657	17.462687	91.641791	37.164179	40.447761
4	Other	47	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
5	Unknown	551	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
6	Native	88	63.846154	3.846154	0.769231	28.461538	37.692308	0.000000	64.615385	3.076923	11.538462	56.153846	29.230769	20.000000

	Race	Number Killed	Prop males	Prop females	Prop Age <18	Prop Age >=18, <30	Prop Age >=30, <60	Prop Age >=60	Prop Num shot	Prop Num shot and tasered	Prop Num mental illness signs	Prop No Body Camera on Police	Prop Person not attacked	Prop Fleed on Car/Foot	demo	Prop Killed
0	Asian	105	16.949153	0.847458	0.677966	4.406780	11.186441	0.847458	16.440678	1.355932	4.576271	14.576271	8.305085	3.389831	5.9	17.796610
1	White	2886	45.174709	2.845258	0.582363	10.782030	31.913478	4.009983	45.723794	2.296173	14.143095	42.961730	16.222962	12.529118	60.1	48.019967
2	Hispanic	1057	55.405405	1.729730	1.513514	21.081081	31.783784	1.189189	54.054054	3.081081	10.108108	49.081081	23.567568	17.621622	18.5	57.135135
3	Black	1507	108.656716	3.805970	3.134328	47.686567	55.820896	3.805970	106.194030	6.268657	17.462687	91.641791	37.164179	40.447761	13.4	112.462687
6	Native	88	63.846154	3.846154	0.769231	28.461538	37.692308	0.000000	64.615385	3.076923	11.538462	56.153846	29.230769	20.000000	1.3	67.692308

	Race	Number Killed	Prop males	Prop females	Prop Age <18	Prop Age >=18, <30	Prop Age >=30, <60	Prop Age >=60	Prop Num shot	Prop Num shot and tasered	Prop Num mental illness signs	Prop No Body Camera on Police	Prop Person not attacked	Prop Fleed on Car/Foot	demo	Prop Killed	Prop Carried nonlethal
0	Asian	105	16.949153	0.847458	0.677966	4.406780	11.186441	0.847458	16.440678	1.355932	4.576271	14.576271	8.305085	3.389831	5.9	17.796610	3.220339
1	White	2886	45.174709	2.845258	0.582363	10.782030	31.913478	4.009983	45.723794	2.296173	14.143095	42.961730	16.222962	12.529118	60.1	48.019967	7.554077
2	Hispanic	1057	55.405405	1.729730	1.513514	21.081081	31.783784	1.189189	54.054054	3.081081	10.108108	49.081081	23.567568	17.621622	18.5	57.135135	10.162162
3	Black	1507	108.656716	3.805970	3.134328	47.686567	55.820896	3.805970	106.194030	6.268657	17.462687	91.641791	37.164179	40.447761	13.4	112.462687	20.223881
6	Native	88	63.846154	3.846154	0.769231	28.461538	37.692308	0.000000	64.615385	3.076923	11.538462	56.153846	29.230769	20.000000	1.3	67.692308	11.538462

CMSC320 Data Science Tutorial - Analyzing US Fatal Shootings¶

Katherine Kim, UID: 115928268, May 17, 2021¶

Part 1 - Retrieving the Data¶

Part 2 - Tidying Data¶

Part 3 - Visual Analysis¶

Part 4: Hypothesis Testing and Machine Learning¶

Part 4: Communication/Wrap Up¶

	Race	Date	Num Killed	Proportion
0	Asian	2015	15	2.542373
1	Asian	2016	15	2.542373
2	Asian	2017	16	2.711864
3	Asian	2018	22	3.728814
4	Asian	2019	20	3.389831

	total	Asian	Hispanic	White	Native	Black	State
WA	175	1.525424	1.297297	1.331115	5.384615	1.865672	WA
OR	100	0.000000	0.432432	1.214642	0.000000	0.522388	OR
KS	56	0.000000	0.486486	0.632280	0.769231	0.522388	KS
CA	927	5.932203	19.783784	4.259567	3.076923	11.417910	CA
CO	219	0.847458	3.027027	1.830283	3.846154	1.417910	CO

	sum_sq	df	F	PR(>F)
C(Race)	677.489931	4.0	13.560838	0.000002
Residual	374.694719	30.0	NaN	NaN

Race	Asian	Black	Hispanic	Native	White
Date
2015	2.542373	19.253731	9.297297	6.923077	8.369384
2016	2.542373	17.611940	8.702703	12.307692	7.770383
2017	2.711864	16.716418	9.783784	16.923077	7.653910
2018	3.728814	17.313433	9.027027	12.307692	7.670549
2019	3.389831	18.805970	9.081081	10.000000	7.054908