#import the important libraries
import pandas as pd
import numpy as np # linear algebra
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import plotly.express as px
import plotly.graph_objects as go
from collections import Counter
import seaborn as sns
%matplotlib inline
#read the dataset
df= pd.read_csv("globalterrorismdb_0718dist.csv", encoding = "ISO-8859-1")
df.head(10)
#Checking the shape
df.shape
Rename the necessary columns
df.rename(columns={'iyear':'Year','imonth':'Month','city':'City','iday':'Day','country_txt':'Country','region_txt':'Region','attacktype1_txt':'AttackType','target1':'Target','nkill':'Killed','nwound':'Wounded','summary':'Summary','gname':'Group','targtype1_txt':'Target_type','weaptype1_txt':'Weapon_type','motive':'Motive'},inplace=True)
df['Casualities'] = df.Killed + df.Wounded
df=df[['Year','Month','Day','Country','Region','City','latitude','longitude','AttackType','Killed','Wounded','Casualities','Target','Group','Target_type','Weapon_type']]
df.head(10)
Checking for Missing data:
df.isnull().sum()
Removing the Missing data:
df.dropna(axis=0, inplace=True)
df.shape
Re-Checking for Missing Data:
df.info()
Yearly Count of Terrorist Attack-
plt.figure(figsize=(15, 10))
sns.countplot(x="Year", data=df)
plt.xticks(rotation=90)
plt.title('Number Of Terrorist Activities Each Year')
plt.show()
From the graph we can see 2013-17 marks the highest attacks with 2014 having the highest.
year_cas = df.groupby('Year').Casualities.sum().to_frame().reset_index()
year_cas.columns = ['Year','Casualities']
px.bar(data_frame=year_cas,x = 'Year',y = 'Casualities',color='Casualities',template='plotly_dark')
It is observed that 2015 marks the highest Casualities records.
target = list(df['Target_type'])
target_map = dict(Counter(target))
target_df = pd.DataFrame(target_map.items())
target_df.columns = ['Target Type','Count']
px.bar(data_frame=target_df,x = 'Target Type',y = 'Count',color='Target Type',template='plotly_dark')
Private Citizens and Property Counts the highest amongst all.
#Counting the Casuallities according the Attack Type
AttackType=df.pivot_table(columns='AttackType',values='Casualities',aggfunc='sum')
AttackType = AttackType.T
AttackType['Type'] = AttackType.index
#plotting the Attack Type
labels = AttackType.columns.tolist()
attack=AttackType.T
values=attack.values.tolist()
values = sum(values,[])
attack_type = list(df['AttackType'].unique())
fig = go.Figure(data=[go.Pie(labels = attack_type,values=values,hole=.3)])
fig.update_layout(template = 'plotly_dark')
fig.show()
Bombing and Explosion method shows the highest chossen type.
df.shape
from collections import Counter
values = list(df['AttackType'])
value_map = dict(Counter(values))
value_df = pd.DataFrame(value_map.items())
value_df.columns = ["AttackType","Count of Attack Type"]
px.bar(data_frame=value_df,x = 'AttackType',y = 'Count of Attack Type',color = 'AttackType',template="plotly_dark")
Again, Bombing and Explosion shows the highest.
import folium
from folium.plugins import MarkerCluster
year=df[df['Year']==2014]
mapData=year.loc[:,'City':'longitude']
mapData=mapData.dropna().values.tolist()
map = folium.Map(location = [0, 50], tiles='CartoDB positron', zoom_start=2)
markerCluster = folium.plugins.MarkerCluster().add_to(map)
for point in range(0, len(mapData)):
folium.Marker(location=[mapData[point][1],mapData[point][2]],
popup = mapData[point][0]).add_to(markerCluster)
map