Perform ‘Exploratory Data Analysis’ on dataset ‘Indian Premier League’

● As a sports analysts, find out the most successful teams, players and factors contributing win or loss of a team.

● Suggest teams or players a company should endorse for its products.

● You can choose any of the tool of your choice (Python/R/Tableau/PowerBI/Excel/SAP/SAS)

In [79]:
import pandas as pd
import numpy as np
import seaborn as sn
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as py
from plotly.subplots import make_subplots
from plotly import tools
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
deliveries = pd.read_csv('deliveries.csv')
matches = pd.read_csv('matches.csv')
In [3]:
pd.set_option('display.max_columns',None)
In [4]:
deliveries.head(3)
Out[4]:
match_id inning batting_team bowling_team over ball batsman non_striker bowler is_super_over wide_runs bye_runs legbye_runs noball_runs penalty_runs batsman_runs extra_runs total_runs player_dismissed dismissal_kind fielder
0 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 1 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NaN NaN NaN
1 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 2 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NaN NaN NaN
2 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 3 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 4 0 4 NaN NaN NaN
In [5]:
deliveries.shape,matches.shape
Out[5]:
((179078, 21), (756, 18))
In [6]:
matches.head(3)
Out[6]:
id season city date team1 team2 toss_winner toss_decision result dl_applied winner win_by_runs win_by_wickets player_of_match venue umpire1 umpire2 umpire3
0 1 2017 Hyderabad 2017-04-05 Sunrisers Hyderabad Royal Challengers Bangalore Royal Challengers Bangalore field normal 0 Sunrisers Hyderabad 35 0 Yuvraj Singh Rajiv Gandhi International Stadium, Uppal AY Dandekar NJ Llong NaN
1 2 2017 Pune 2017-04-06 Mumbai Indians Rising Pune Supergiant Rising Pune Supergiant field normal 0 Rising Pune Supergiant 0 7 SPD Smith Maharashtra Cricket Association Stadium A Nand Kishore S Ravi NaN
2 3 2017 Rajkot 2017-04-07 Gujarat Lions Kolkata Knight Riders Kolkata Knight Riders field normal 0 Kolkata Knight Riders 0 10 CA Lynn Saurashtra Cricket Association Stadium Nitin Menon CK Nandan NaN
In [7]:
matches.columns
Out[7]:
Index(['id', 'season', 'city', 'date', 'team1', 'team2', 'toss_winner',
       'toss_decision', 'result', 'dl_applied', 'winner', 'win_by_runs',
       'win_by_wickets', 'player_of_match', 'venue', 'umpire1', 'umpire2',
       'umpire3'],
      dtype='object')
In [8]:
temp_match = matches[['season','team1', 'team2','toss_winner','toss_decision','winner', 'win_by_runs',
       'win_by_wickets', 'player_of_match', 'venue']]
temp_match.head()
Out[8]:
season team1 team2 toss_winner toss_decision winner win_by_runs win_by_wickets player_of_match venue
0 2017 Sunrisers Hyderabad Royal Challengers Bangalore Royal Challengers Bangalore field Sunrisers Hyderabad 35 0 Yuvraj Singh Rajiv Gandhi International Stadium, Uppal
1 2017 Mumbai Indians Rising Pune Supergiant Rising Pune Supergiant field Rising Pune Supergiant 0 7 SPD Smith Maharashtra Cricket Association Stadium
2 2017 Gujarat Lions Kolkata Knight Riders Kolkata Knight Riders field Kolkata Knight Riders 0 10 CA Lynn Saurashtra Cricket Association Stadium
3 2017 Rising Pune Supergiant Kings XI Punjab Kings XI Punjab field Kings XI Punjab 0 6 GJ Maxwell Holkar Cricket Stadium
4 2017 Royal Challengers Bangalore Delhi Daredevils Royal Challengers Bangalore bat Royal Challengers Bangalore 15 0 KM Jadhav M Chinnaswamy Stadium
In [9]:
temp_match['season'].unique()
Out[9]:
array([2017, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2018,
       2019], dtype=int64)
In [10]:
temp_match = temp_match.sort_values('season',ascending=True).reset_index()
temp_match = temp_match.drop(['index'],axis = 1)
temp_match.head()
Out[10]:
season team1 team2 toss_winner toss_decision winner win_by_runs win_by_wickets player_of_match venue
0 2008 Chennai Super Kings Rajasthan Royals Rajasthan Royals field Rajasthan Royals 0 3 YK Pathan Dr DY Patil Sports Academy
1 2008 Chennai Super Kings Rajasthan Royals Chennai Super Kings bat Rajasthan Royals 0 8 Sohail Tanvir Sawai Mansingh Stadium
2 2008 Mumbai Indians Delhi Daredevils Delhi Daredevils field Mumbai Indians 29 0 SM Pollock Dr DY Patil Sports Academy
3 2008 Kings XI Punjab Kolkata Knight Riders Kings XI Punjab bat Kings XI Punjab 9 0 IK Pathan Punjab Cricket Association Stadium, Mohali
4 2008 Deccan Chargers Royal Challengers Bangalore Deccan Chargers bat Royal Challengers Bangalore 0 5 R Vinay Kumar Rajiv Gandhi International Stadium, Uppal
In [11]:
matches['season'].unique()
Out[11]:
array([2017, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2018,
       2019], dtype=int64)
In [12]:
df_2008 = temp_match[(temp_match['season']==2008)]
df_2009 = temp_match[(temp_match['season']==2009)]
df_2010 = temp_match[(temp_match['season']==2010)]
df_2011 = temp_match[(temp_match['season']==2011)]
df_2012 = temp_match[(temp_match['season']==2012)]
df_2013 = temp_match[(temp_match['season']==2013)]
df_2014 = temp_match[(temp_match['season']==2014)]
df_2015 = temp_match[(temp_match['season']==2015)]
df_2016 = temp_match[(temp_match['season']==2016)]
df_2017 = temp_match[(temp_match['season']==2017)]
df_2018 = temp_match[(temp_match['season']==2018)]
In [13]:
df_list = [df_2008,df_2009,df_2010,df_2011,df_2012,df_2013,df_2014,df_2015,df_2016,df_2017,df_2018]
In [14]:
count  = 2008
for i in range(len(df_list)):
    print(f' Dataframe : df_{str(count)} has a shape of {df_list[i].shape}')
    count+=1
 Dataframe : df_2008 has a shape of (58, 10)
 Dataframe : df_2009 has a shape of (57, 10)
 Dataframe : df_2010 has a shape of (60, 10)
 Dataframe : df_2011 has a shape of (73, 10)
 Dataframe : df_2012 has a shape of (74, 10)
 Dataframe : df_2013 has a shape of (76, 10)
 Dataframe : df_2014 has a shape of (60, 10)
 Dataframe : df_2015 has a shape of (59, 10)
 Dataframe : df_2016 has a shape of (60, 10)
 Dataframe : df_2017 has a shape of (59, 10)
 Dataframe : df_2018 has a shape of (60, 10)
In [15]:
df_2008.columns
Out[15]:
Index(['season', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
       'win_by_runs', 'win_by_wickets', 'player_of_match', 'venue'],
      dtype='object')

Teams which Won the toss and also the Match

In [16]:
def returndf(df):
    
    temp = df[(df['toss_winner'])==(df['winner'])]
    return temp
In [17]:
df_2008_toss = returndf(df_2008)
df_2009_toss = returndf(df_2009)
df_2010_toss = returndf(df_2010)
df_2011_toss = returndf(df_2011)
df_2012_toss = returndf(df_2012)
df_2013_toss = returndf(df_2013)
df_2014_toss = returndf(df_2014)
df_2015_toss = returndf(df_2015)
df_2016_toss = returndf(df_2016)
df_2017_toss = returndf(df_2017)
df_2018_toss = returndf(df_2018)
In [18]:
df_2008_toss.head()
Out[18]:
season team1 team2 toss_winner toss_decision winner win_by_runs win_by_wickets player_of_match venue
0 2008 Chennai Super Kings Rajasthan Royals Rajasthan Royals field Rajasthan Royals 0 3 YK Pathan Dr DY Patil Sports Academy
3 2008 Kings XI Punjab Kolkata Knight Riders Kings XI Punjab bat Kings XI Punjab 9 0 IK Pathan Punjab Cricket Association Stadium, Mohali
6 2008 Rajasthan Royals Kolkata Knight Riders Rajasthan Royals bat Rajasthan Royals 45 0 SA Asnodkar Sawai Mansingh Stadium
7 2008 Deccan Chargers Kings XI Punjab Kings XI Punjab field Kings XI Punjab 0 7 SE Marsh Rajiv Gandhi International Stadium, Uppal
10 2008 Chennai Super Kings Royal Challengers Bangalore Chennai Super Kings bat Chennai Super Kings 13 0 MS Dhoni M Chinnaswamy Stadium
In [19]:
toss_list = [df_2008_toss,df_2009_toss,df_2010_toss,df_2011_toss,df_2012_toss,df_2013_toss,df_2014_toss,df_2015_toss,
            df_2016_toss,df_2017_toss,df_2018_toss]
shape_list = []
count = 2008
for i in range(len(toss_list)):
    print(f' Dataframe df_{str(count)}_toss has a shape of : {toss_list[i].shape}')
    shape_list.append(toss_list[i].shape[0])
    count+=1
    
 Dataframe df_2008_toss has a shape of : (28, 10)
 Dataframe df_2009_toss has a shape of : (33, 10)
 Dataframe df_2010_toss has a shape of : (31, 10)
 Dataframe df_2011_toss has a shape of : (38, 10)
 Dataframe df_2012_toss has a shape of : (33, 10)
 Dataframe df_2013_toss has a shape of : (36, 10)
 Dataframe df_2014_toss has a shape of : (30, 10)
 Dataframe df_2015_toss has a shape of : (28, 10)
 Dataframe df_2016_toss has a shape of : (34, 10)
 Dataframe df_2017_toss has a shape of : (34, 10)
 Dataframe df_2018_toss has a shape of : (32, 10)
In [20]:
shape_list,len(shape_list)
Out[20]:
([28, 33, 31, 38, 33, 36, 30, 28, 34, 34, 32], 11)
In [21]:
temp = pd.DataFrame()
temp['Season'] = [i for i in range(2008,2019)]
temp['Wins'] = np.array(shape_list)
temp.head()
Out[21]:
Season Wins
0 2008 28
1 2009 33
2 2010 31
3 2011 38
4 2012 33
In [22]:
sn.barplot(data=temp,x = 'Season',y = 'Wins',palette='plasma')
plt.title('Teams which won the Toss and Match as well')
plt.xlabel('Season')
plt.ylabel('Wins')
Out[22]:
Text(0, 0.5, 'Wins')
In [23]:
px.bar(data_frame=temp,x = 'Season',y = 'Wins',color = 'Wins',hover_name='Season',title='Toss Won Match Won :)')
In [24]:
df_2008.head()
Out[24]:
season team1 team2 toss_winner toss_decision winner win_by_runs win_by_wickets player_of_match venue
0 2008 Chennai Super Kings Rajasthan Royals Rajasthan Royals field Rajasthan Royals 0 3 YK Pathan Dr DY Patil Sports Academy
1 2008 Chennai Super Kings Rajasthan Royals Chennai Super Kings bat Rajasthan Royals 0 8 Sohail Tanvir Sawai Mansingh Stadium
2 2008 Mumbai Indians Delhi Daredevils Delhi Daredevils field Mumbai Indians 29 0 SM Pollock Dr DY Patil Sports Academy
3 2008 Kings XI Punjab Kolkata Knight Riders Kings XI Punjab bat Kings XI Punjab 9 0 IK Pathan Punjab Cricket Association Stadium, Mohali
4 2008 Deccan Chargers Royal Challengers Bangalore Deccan Chargers bat Royal Challengers Bangalore 0 5 R Vinay Kumar Rajiv Gandhi International Stadium, Uppal
In [25]:
def returndict(df):
    values = df['winner'].values
    values = list(values)
    df_val = dict(zip(values,map(values.count,values)))
    key_list = []
    val_list = []
    for key,val in df_val.items():
        key_list.append(key)
        val_list.append(val)
    df = pd.DataFrame()
    df['Team'] = key_list
    df['Wins'] = val_list
    return df

win_2008 = returndict(df_2008)
win_2009 = returndict(df_2009)
win_2010 = returndict(df_2010)
win_2011 = returndict(df_2011)
win_2012 = returndict(df_2012)
win_2013 = returndict(df_2013)
win_2014 = returndict(df_2014)
win_2015 = returndict(df_2015)
win_2016 = returndict(df_2016)
win_2017 = returndict(df_2017)
win_2018 = returndict(df_2018)
In [26]:
fig = make_subplots(rows=4,cols=3,subplot_titles=("Successful Teams of Season 2008",
                                                 "Successful Teams of Season 2009",
                                                 "Successful Teams of Season 2010",
                                                 "Successful Teams of Season 2011",
                                                 "Successful Teams of Season 2012",
                                                 "Successful Teams of Season 2013",
                                                 "Successful Teams of Season 2014",
                                                 "Successful Teams of Season 2015",
                                                 "Successful Teams of Season 2016",
                                                 "Successful Teams of Season 2017",
                                                 "Successful Teams of Season 2018"),shared_yaxes=True)

fig.add_trace(go.Bar(x = win_2008['Team'],y = win_2008['Wins'],marker = dict(color = win_2008['Wins'],coloraxis = "coloraxis")),row = 1,col = 1)
fig.add_trace(go.Bar(x = win_2009['Team'],y = win_2009['Wins'],marker = dict(color = win_2009['Wins'],coloraxis = "coloraxis")),row = 1,col = 2)
fig.add_trace(go.Bar(x = win_2010['Team'],y = win_2010['Wins'],marker = dict(color = win_2010['Wins'],coloraxis = "coloraxis")),row = 1,col = 3)
fig.add_trace(go.Bar(x = win_2011['Team'],y = win_2011['Wins'],marker = dict(color = win_2011['Wins'],coloraxis = "coloraxis")),row = 2,col = 1)
fig.add_trace(go.Bar(x = win_2012['Team'],y = win_2012['Wins'],marker = dict(color = win_2012['Wins'],coloraxis = "coloraxis")),row = 2,col = 2)
fig.add_trace(go.Bar(x = win_2013['Team'],y = win_2013['Wins'],marker = dict(color = win_2013['Wins'],coloraxis = "coloraxis")),row = 2,col = 3)
fig.add_trace(go.Bar(x = win_2014['Team'],y = win_2014['Wins'],marker = dict(color = win_2014['Wins'],coloraxis = "coloraxis")),row = 3,col = 1)
fig.add_trace(go.Bar(x = win_2015['Team'],y = win_2015['Wins'],marker = dict(color = win_2015['Wins'],coloraxis = "coloraxis")),row = 3,col = 2)
fig.add_trace(go.Bar(x = win_2016['Team'],y = win_2016['Wins'],marker = dict(color = win_2016['Wins'],coloraxis = "coloraxis")),row = 3,col = 3)
fig.add_trace(go.Bar(x = win_2017['Team'],y = win_2017['Wins'],marker = dict(color = win_2017['Wins'],coloraxis = "coloraxis")),row = 4,col = 1)
fig.add_trace(go.Bar(x = win_2018['Team'],y = win_2018['Wins'],marker = dict(color = win_2018['Wins'],coloraxis = "coloraxis")),row = 4,col = 2)
fig.update_layout(height =1500,width = 1400,title_text = "Successful Teams in IPL from season 2008-2018")
fig.show()
In [27]:
win_2008_3 = win_2008.sort_values('Wins',ascending=False).head(3)
win_2009_3 = win_2009.sort_values('Wins',ascending=False).head(3)
win_2010_3 = win_2010.sort_values('Wins',ascending=False).head(3)
win_2011_3 = win_2011.sort_values('Wins',ascending=False).head(3)
win_2012_3 = win_2012.sort_values('Wins',ascending=False).head(3)
win_2013_3 = win_2013.sort_values('Wins',ascending=False).head(3)
win_2014_3 = win_2014.sort_values('Wins',ascending=False).head(3)
win_2015_3 = win_2015.sort_values('Wins',ascending=False).head(3)
win_2016_3 = win_2016.sort_values('Wins',ascending=False).head(3)
win_2017_3 = win_2017.sort_values('Wins',ascending=False).head(3)
win_2018_3 = win_2018.sort_values('Wins',ascending=False).head(3)
In [28]:
fig = make_subplots(rows=6,cols=2,subplot_titles=("Successful Teams of Season 2008",
                                                 "Successful Teams of Season 2009",
                                                 "Successful Teams of Season 2010",
                                                 "Successful Teams of Season 2011",
                                                 "Successful Teams of Season 2012",
                                                 "Successful Teams of Season 2013",
                                                 "Successful Teams of Season 2014",
                                                 "Successful Teams of Season 2015",
                                                 "Successful Teams of Season 2016",
                                                 "Successful Teams of Season 2017",
                                                 "Successful Teams of Season 2018"),shared_yaxes=True)

fig.add_trace(go.Bar(x = win_2008_3['Team'],y = win_2008_3['Wins'],marker = dict(color = win_2008_3['Wins'],coloraxis = "coloraxis")),row = 1,col = 1)
fig.add_trace(go.Bar(x = win_2009_3['Team'],y = win_2009_3['Wins'],marker = dict(color = win_2009_3['Wins'],coloraxis = "coloraxis")),row = 1,col = 2)
fig.add_trace(go.Bar(x = win_2010_3['Team'],y = win_2010_3['Wins'],marker = dict(color = win_2010_3['Wins'],coloraxis = "coloraxis")),row = 2,col = 1)
fig.add_trace(go.Bar(x = win_2011_3['Team'],y = win_2011_3['Wins'],marker = dict(color = win_2011_3['Wins'],coloraxis = "coloraxis")),row = 2,col = 2)
fig.add_trace(go.Bar(x = win_2012_3['Team'],y = win_2012_3['Wins'],marker = dict(color = win_2012_3['Wins'],coloraxis = "coloraxis")),row = 3,col = 1)
fig.add_trace(go.Bar(x = win_2013_3['Team'],y = win_2013_3['Wins'],marker = dict(color = win_2013_3['Wins'],coloraxis = "coloraxis")),row = 3,col = 2)
fig.add_trace(go.Bar(x = win_2014_3['Team'],y = win_2014_3['Wins'],marker = dict(color = win_2014_3['Wins'],coloraxis = "coloraxis")),row = 4,col = 1)
fig.add_trace(go.Bar(x = win_2015_3['Team'],y = win_2015_3['Wins'],marker = dict(color = win_2015_3['Wins'],coloraxis = "coloraxis")),row = 4,col = 2)
fig.add_trace(go.Bar(x = win_2016_3['Team'],y = win_2016_3['Wins'],marker = dict(color = win_2016_3['Wins'],coloraxis = "coloraxis")),row = 5,col = 1)
fig.add_trace(go.Bar(x = win_2017_3['Team'],y = win_2017_3['Wins'],marker = dict(color = win_2017_3['Wins'],coloraxis = "coloraxis")),row = 5,col = 2)
fig.add_trace(go.Bar(x = win_2018_3['Team'],y = win_2018_3['Wins'],marker = dict(color = win_2018_3['Wins'],coloraxis = "coloraxis")),row = 6,col = 1)
fig.update_layout(height =1500,width = 900,title_text = "Successful Teams in IPL from season 2008-2018")
fig.show()

For the season 2008,Rajasthan Royals,Kings XI Punjab,Chennai SuperKings,were the top 3 Performing Teams

For the season 2009,Delhi DareDevils,Deccan Chargers,Royal Challengers Banglore,were the top 3 Performing Teams

For the season 2010,Mumbai Indians,Chennai SuperKings,Royal Challengers Banglore,were the top 3 Performing Teams

For the season 2011,Mumbai Indians,Chennai SuperKings,Royal Challengers Banglore,were the top 3 Performing Teams

For the season 2012,Kolkata Knight Riders,Delhi DareDevils,Mumbai Indians,were the top 3 Performing Teams

For the season 2013,Mumbai Indians,Chennai SuperKings,Rajasthan Royals,were the top 3 Performing Teams

For the season 2014,Kings XI Punjab,Kolkata Knight Riders,Chennai SuperKings,were the top 3 Performing Teams

For the season 2015,Chennai Super Kings,Mumbai Indians,Royal Challengers Banglore,were the top 3 Performing Teams

For the season 2016,Surises Hyderabad,Gujarat Lions,Royal Challengers Banglore,were the top 3 Performing Teams

For the season 2017,Mumbai Indians,Rising Pune Supergaints,Kolkata Knight Riders,were the top 3 Performing Teams

For the season 2018,Chennai SuperKings,Sunrises Hyderabad,Kolkata Knight Riders,were the top 3 Performing Teams

We can observe the trend from the following Graph,we can observe that Mumbai Indians,Chennai Super Kings,Royal Challengers Banglore,Kolkata Knight Riders were the most Performing Teams/Successful Teams out of all teams,as these teams were in the top-3 positions in the Points Table,hence these teams have participated most number of times in the World Champions T20 League from India,So these teams can be a selection choice if a company wants to endorse its products!,as these are the most loved teams in the Indian Premier League almost for all seasons

Top Performing Batsmans

In [29]:
deliveries.head()
Out[29]:
match_id inning batting_team bowling_team over ball batsman non_striker bowler is_super_over wide_runs bye_runs legbye_runs noball_runs penalty_runs batsman_runs extra_runs total_runs player_dismissed dismissal_kind fielder
0 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 1 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NaN NaN NaN
1 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 2 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NaN NaN NaN
2 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 3 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 4 0 4 NaN NaN NaN
3 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 4 DA Warner S Dhawan TS Mills 0 0 0 0 0 0 0 0 0 NaN NaN NaN
4 1 1 Sunrisers Hyderabad Royal Challengers Bangalore 1 5 DA Warner S Dhawan TS Mills 0 2 0 0 0 0 0 2 2 NaN NaN NaN
In [30]:
a = list(deliveries['batting_team'].unique())
a
Out[30]:
['Sunrisers Hyderabad',
 'Royal Challengers Bangalore',
 'Mumbai Indians',
 'Rising Pune Supergiant',
 'Gujarat Lions',
 'Kolkata Knight Riders',
 'Kings XI Punjab',
 'Delhi Daredevils',
 'Chennai Super Kings',
 'Rajasthan Royals',
 'Deccan Chargers',
 'Kochi Tuskers Kerala',
 'Pune Warriors',
 'Rising Pune Supergiants',
 'Delhi Capitals']
In [31]:
df_csk = deliveries[(deliveries['batting_team']=='Chennai Super Kings')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
df_mi = deliveries[(deliveries['batting_team']=='Mumbai Indians')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
df_rcb = deliveries[(deliveries['batting_team']=='Royal Challengers Bangalore')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
df_kkr = deliveries[(deliveries['batting_team']=='Kolkata Knight Riders')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
df_rr = deliveries[(deliveries['batting_team']=='Rajasthan Royals')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
df_kxip = deliveries[(deliveries['batting_team']=='Kings XI Punjab')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
df_dd = deliveries[(deliveries['batting_team']=='Delhi Daredevils') | (deliveries['batting_team']=='Delhi Capitals')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
df_srh = deliveries[(deliveries['batting_team']=='Sunrisers Hyderabad')|(deliveries['batting_team']=='Deccan Chargers')&(deliveries['total_runs']!=0)&(deliveries['total_runs']!=5)&(deliveries['total_runs']!=7)]
In [ ]:
 
In [32]:
df_csk.head()
Out[32]:
match_id inning batting_team bowling_team over ball batsman non_striker bowler is_super_over wide_runs bye_runs legbye_runs noball_runs penalty_runs batsman_runs extra_runs total_runs player_dismissed dismissal_kind fielder
14089 61 1 Chennai Super Kings Kings XI Punjab 1 3 PA Patel ML Hayden B Lee 0 0 0 0 0 0 1 0 1 NaN NaN NaN
14091 61 1 Chennai Super Kings Kings XI Punjab 1 5 ML Hayden PA Patel B Lee 0 0 0 0 0 0 4 0 4 NaN NaN NaN
14093 61 1 Chennai Super Kings Kings XI Punjab 2 1 PA Patel ML Hayden S Sreesanth 0 0 0 0 0 0 4 0 4 NaN NaN NaN
14094 61 1 Chennai Super Kings Kings XI Punjab 2 2 PA Patel ML Hayden S Sreesanth 0 0 0 0 0 0 4 0 4 NaN NaN NaN
14095 61 1 Chennai Super Kings Kings XI Punjab 2 3 PA Patel ML Hayden S Sreesanth 0 0 0 0 0 0 4 0 4 NaN NaN NaN
In [33]:
csk_bat = list(df_csk['batsman'].unique())
mi_bat = list(df_mi['batsman'].unique())
rcb_bat = list(df_rcb['batsman'].unique())
kkr_bat = list(df_kkr['batsman'].unique())
rr_bat = list(df_rr['batsman'].unique())
kxip_bat = list(df_kxip['batsman'].unique())
dd_bat = list(df_dd['batsman'].unique())
srh_bat = list(df_srh['batsman'].unique())
In [34]:
def getmap(df,player):
    temp = df.groupby(df['batsman']==player)[['total_runs']].sum()
    runs = temp['total_runs'][1]
    return runs
In [35]:
df_list = [df_csk,df_mi,df_rcb,df_kkr,df_rr,df_kxip,df_dd,df_srh]
bat_list = [csk_bat,mi_bat,rcb_bat,kkr_bat,rr_bat,kxip_bat,dd_bat,srh_bat]
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
year = [i for i in range(2008,2019)]
count = 2008
main_map = {}
for team,batsman,team_name in zip(df_list,bat_list,teams):
    mapper = {}
    for player in batsman:
        mapper[player] = getmap(team,player)
        
    main_map[team_name] = mapper
    count+=1
    
from pprint import pprint pprint(main_map)
In [36]:
def gettopbatdf(team):
    temp = main_map[team]
    temp = dict(sorted(temp.items(),key=lambda x:x[1],reverse=True))
    temp = dict(list(temp.items())[:10])
    top_bat_df = pd.DataFrame()
    name = []
    runs = []
    for key,val in temp.items():
        name.append(key)
        runs.append(val)
    top_bat_df['Player'] = name
    top_bat_df['Runs'] = runs
    
    return top_bat_df
In [37]:
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
In [38]:
top_bat_csk = gettopbatdf('csk')
top_bat_mi  = gettopbatdf('mi')
top_bat_rcb = gettopbatdf('rcb')
top_bat_kkr = gettopbatdf('kkr')
top_bat_rr = gettopbatdf('rr')
top_bat_kxip = gettopbatdf('kxip')
top_bat_dd = gettopbatdf('dd')
top_bat_srh = gettopbatdf('srh')
In [39]:
top_bat_csk
Out[39]:
Player Runs
0 SK Raina 4747
1 MS Dhoni 4081
2 MEK Hussey 1838
3 M Vijay 1730
4 F du Plessis 1702
5 S Badrinath 1515
6 ML Hayden 1175
7 DR Smith 1020
8 SR Watson 1013
9 DJ Bravo 990
runs = list(top_bat_csk['Runs'].unique()) players = list(top_bat_csk['Player'].unique()) fig = px.bar_polar(r = runs,theta=players, color=players,template="plotly_dark", color_discrete_sequence=px.colors.sequential.Plasma_r) fig.show()
In [40]:
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']

fig = make_subplots(rows=4,cols=2,subplot_titles=('CSK','MI','RCB','KKR',
                                                 'RR','KXIP','DD','SRH'),start_cell = 'top-left',
                                  specs=[[{'type':'polar'},{'type':'polar'}],
                                         [{'type':'polar'},{'type':'polar'}],
                                         [{'type':'polar'},{'type':'polar'}],
                                         [{'type':'polar'},{'type':'polar'}]],shared_yaxes=True)

# fig = make_subplots(rows=4,cols=2,specs=[[{'type':'polar'},{'type':'polar'}],
#                                          [{'type':'polar'},{'type':'polar'}],
#                                          [{'type':'polar'},{'type':'polar'}],
#                                          [{'type':'polar'},{'type':'polar'}]])

fig.add_trace(go.Barpolar(theta=list(top_bat_csk['Player'].unique()),r = list(top_bat_csk['Runs'].unique())),1,1)

fig.add_trace(go.Barpolar(theta=list(top_bat_mi['Player'].unique()),r = list(top_bat_mi['Runs'].unique())),1,2)

fig.add_trace(go.Barpolar(theta=list(top_bat_rcb['Player'].unique()),r = list(top_bat_rcb['Runs'].unique())),2,1)

fig.add_trace(go.Barpolar(theta=list(top_bat_kkr['Player'].unique()),r = list(top_bat_kkr['Runs'].unique())),2,2)

fig.add_trace(go.Barpolar(theta=list(top_bat_rr['Player'].unique()),r = list(top_bat_rr['Runs'].unique())),3,1)

fig.add_trace(go.Barpolar(theta=list(top_bat_kxip['Player'].unique()),r = list(top_bat_kxip['Runs'].unique())),3,2)

fig.add_trace(go.Barpolar(theta=list(top_bat_dd['Player'].unique()),r = list(top_bat_dd['Runs'].unique())),4,1)

fig.add_trace(go.Barpolar(theta=list(top_bat_srh['Player'].unique()),r = list(top_bat_srh['Runs'].unique())),4,2)


# fig.update_traces(hoverinfo="label+percent+name")

# fig.update_layout(
#     title_text="Global Emissions 1990-2011",
#     # Add annotations in the center of the donut pies.
#     annotations=[dict(text='GHG', x=0.18, y=0.5, font_size=20, showarrow=False),
#                  dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False),
#                 dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False),
#                 dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False),
#                 dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False),
#                 dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False),
#                 dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False),
#                 dict(text='CO2', x=0.82, y=0.5, font_size=20, showarrow=False)])

fig.update_layout(height =1600,width = 1000,title_text = "Top Performing Batsmans From Each team")
# annot =list(fig.layout.annotations)
# fig.layout.annotations = annot
fig.show()

Top 5 Batsmans Stats

In [41]:
def gettopbatdf(team):
    temp = main_map[team]
    temp = dict(sorted(temp.items(),key=lambda x:x[1],reverse=True))
    temp = dict(list(temp.items())[:5])
    keys = []
    for key,val in temp.items():
        keys.append(key)
        
    return keys
In [42]:
bat_csk = gettopbatdf('csk')
bat_mi  = gettopbatdf('mi')
bat_rcb = gettopbatdf('rcb')
bat_kkr = gettopbatdf('kkr')
bat_rr = gettopbatdf('rr')
bat_kxip = gettopbatdf('kxip')
bat_dd = gettopbatdf('dd')
bat_srh = gettopbatdf('srh')
In [43]:
bat_csk
Out[43]:
['SK Raina', 'MS Dhoni', 'MEK Hussey', 'M Vijay', 'F du Plessis']
In [44]:
def getruns(df,player):
    temp = df[(df['batsman']==player)]
    runs = list(temp['total_runs'])
    mapper = dict(zip(runs,map(runs.count,runs)))
    return mapper
In [45]:
df_list = [df_csk,df_mi,df_rcb,df_kkr,df_rr,df_kxip,df_dd,df_srh]
bats = [bat_csk,bat_mi,bat_rcb,bat_kkr,bat_rr,bat_kxip,bat_dd,bat_srh]
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
final_mapper = {}
for i,j,k in zip(df_list,bats,teams):
    mapper = {}
    for player in j:
        mapper[player] = getruns(i,player)
        final_mapper[k] = mapper
pprint(final_mapper)
In [46]:
def gettop5dataframe(team):
    top_5 = dict(list(final_mapper[team].items()))
    dummy = pd.DataFrame(top_5)
    dummy = dummy.T
    dummy = dummy.fillna(value = 0)
    for i in dummy.columns:
        if i==5 or i==8 or i==10 or i==7:
            dummy = dummy.drop([i],axis = 1)
     
    dummy = dummy.astype('int32')
    dummy['Player'] = dummy.index
    dummy = dummy.reset_index()
    dummy = dummy[['Player',1,2,3,4,6]]
    return dummy
In [47]:
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
In [48]:
csk_ind = gettop5dataframe('csk')
mi_ind = gettop5dataframe('mi')
rcb_ind = gettop5dataframe('rcb')
kkr_ind = gettop5dataframe('kkr')
rr_ind = gettop5dataframe('rr')
kxip_ind = gettop5dataframe('kxip')
dd_ind = gettop5dataframe('dd')
srh_ind = gettop5dataframe('srh')
csk_ind['Total'] = csk_ind.sum(axis=1) mi_ind['Total'] = mi_ind.sum(axis=1) rcb_ind['Total'] = rcb_ind.sum(axis=1) kkr_ind['Total'] = kkr_ind.sum(axis=1) rr_ind['Total'] = rr_ind.sum(axis = 1) kxip_ind['Total'] = kxip_ind.sum(axis = 1) dd_ind['Total'] = dd_ind.sum(axis = 1) srh_ind['Total'] = srh_ind.sum(axis = 1)
In [49]:
csk_ind
Out[49]:
Player 1 2 3 4 6
0 SK Raina 1497 259 14 412 171
1 MS Dhoni 1254 314 15 269 177
2 MEK Hussey 634 89 8 183 45
3 M Vijay 467 117 11 147 68
4 F du Plessis 590 90 10 146 50
In [50]:
def getnameandruns(df_i):
    final = []
    for i in range(df_i.shape[0]):
        first = df_i.iloc[[i]]
        temp = []
        for i in first.columns:
            a = []
            a.append(list(first[i]))
            a = sum(a,[])
            temp.append(a)

        final.append(temp)


    final = sum(final,[])
    final = sum(final,[])
    name = [i for i in final if type(i)!=int]
    runs = [i for i in final if type(i)==int]
    final_runs = []
    start = 0
    end = 5
    while end<=len(runs):
        temp = runs[start:end]
        final_runs.append(temp)
        start = end
        end+=5
        
    return name,final_runs
In [51]:
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
In [52]:
csk_name,csk_runs = getnameandruns(csk_ind)
mi_name,mi_runs = getnameandruns(mi_ind)
rcb_name,rcb_runs = getnameandruns(rcb_ind)
kkr_name,kkr_runs = getnameandruns(kkr_ind)
rr_name,rr_runs = getnameandruns(rr_ind)
kxip_name,kxip_runs = getnameandruns(kxip_ind)
dd_name,dd_runs = getnameandruns(dd_ind)
srh_name,srh_runs = getnameandruns(srh_ind)
In [53]:
srh_runs
Out[53]:
[[1186, 173, 17, 404, 73],
 [910, 235, 10, 313, 122],
 [443, 91, 4, 114, 44],
 [277, 34, 2, 135, 64],
 [396, 62, 1, 94, 51]]
In [54]:
list(top_bat_csk['Runs'].unique())[:5]
Out[54]:
[4747, 4081, 1838, 1730, 1702]
In [55]:
names = [csk_name,mi_name,rcb_name,kkr_name,rcb_name,kxip_name,dd_name,srh_name]
final = []
for i in range(len(names)):
    final.append(names[i])
    
final
Out[55]:
[['SK Raina', 'MS Dhoni', 'MEK Hussey', 'M Vijay', 'F du Plessis'],
 ['RG Sharma', 'KA Pollard', 'AT Rayudu', 'SR Tendulkar', 'HH Pandya'],
 ['V Kohli', 'AB de Villiers', 'CH Gayle', 'JH Kallis', 'R Dravid'],
 ['G Gambhir', 'RV Uthappa', 'YK Pathan', 'AD Russell', 'CA Lynn'],
 ['V Kohli', 'AB de Villiers', 'CH Gayle', 'JH Kallis', 'R Dravid'],
 ['SE Marsh', 'DA Miller', 'KL Rahul', 'GJ Maxwell', 'WP Saha'],
 ['V Sehwag', 'RR Pant', 'SS Iyer', 'DA Warner', 'G Gambhir'],
 ['S Dhawan', 'DA Warner', 'KS Williamson', 'AC Gilchrist', 'RG Sharma']]
In [56]:
def drawplots(player_runs,player_names,player_name,top_bat,text):
    runs = [1,2,3,4,6]
    runs = [str(i) for i in runs]
    fig = make_subplots(rows=2,cols = 3,subplot_titles=tuple(player_names),
                       specs=[[{'type':'domain'},{'type':'domain'},{'type':'domain'}],
                              [{'type':'domain'},{'type':'domain'},{'type':'domain'}]],shared_yaxes=True)
    fig.add_trace(go.Pie(labels=runs,values=player_runs[0],name = player_names[0]),1,1)
    fig.add_trace(go.Pie(labels=runs,values=player_runs[1],name = player_names[1]),1,2)
    fig.add_trace(go.Pie(labels=runs,values=player_runs[2],name = player_names[2]),1,3)
    fig.add_trace(go.Pie(labels=runs,values=player_runs[3],name = player_names[3]),2,1)
    fig.add_trace(go.Pie(labels=runs,values=player_runs[4],name = player_names[4]),2,2)
    fig.add_trace(go.Pie(labels = player_name,values=list(top_bat['Runs'].unique())[:5]),2,3)
    fig.update_traces(hole=.4, hoverinfo="label+percent+name")
    fig.update_layout(height = 750,width = 900,title_text = text)

    fig.show()
    
In [57]:
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
top_bat = [top_bat_csk,top_bat_mi,top_bat_rcb,top_bat_kkr,top_bat_rr,top_bat_kxip,top_bat_dd,top_bat_srh]
player_name = [csk_name,mi_name,rcb_name,kkr_name,rr_name,kxip_name,dd_name,srh_name]
player_runs = [csk_runs,mi_runs,rcb_runs,kkr_runs,rr_runs,kxip_runs,dd_runs,srh_runs]
for i,j,k,l,m in zip(range(len(teams)),range(len(player_name)),range(len(player_runs)),range(len(top_bat)),range(len(final))):
    
    drawplots(player_runs[k],player_name[j],final[m],top_bat[l],text = "Top 5 Batsmans From"+" "+str(teams[i]).upper())
    print("-"*120)
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
In [74]:
def getresults(df,player_list):
    
    final = {}
    for player in player_list:
        temp = df[(df['batsman']==player)]
        bowler = list(temp['bowler'])
        bowler_map = dict(Counter(bowler))
        bowler_map = dict(sorted(bowler_map.items(),key=lambda x:x[1],reverse=True))
        bowler_map = dict(list(bowler_map.items())[:5])
        final[player] = bowler_map

    final_name = []
    final_runs = []
    for key,val in final.items():
        player_runs = []
        player_name = []
        for key1,val1 in val.items():
            player_name.append(key1)
            player_runs.append(val1)

        final_name.append(player_name)
        final_runs.append(player_runs)
        
    
    return final_name,final_runs
In [76]:
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
In [75]:
csk_player,csk_run = getresults(df_csk,csk_name)
mi_player,mi_run = getresults(df_mi,mi_name)
rcb_player,rcb_run = getresults(df_rcb,rcb_name)
kkr_player,kkr_run = getresults(df_kkr,kkr_name)
rr_player,rr_run = getresults(df_rr,rr_name)
kxip_player,kxip_run = getresults(df_kxip,kxip_name)
dd_player,dd_run = getresults(df_dd,dd_name)
srh_player,srh_run = getresults(df_srh,srh_name)
In [95]:
srh_player[0]
Out[95]:
['Harbhajan Singh', 'DS Kulkarni', 'UT Yadav', 'MM Sharma', 'M Morkel']
In [91]:
srh_run
Out[91]:
[[81, 79, 68, 55, 55],
 [81, 71, 67, 62, 53],
 [51, 39, 36, 35, 32],
 [23, 18, 18, 17, 17],
 [30, 27, 23, 21, 17]]
In [112]:
csk_player
Out[112]:
[['Harbhajan Singh', 'PP Chawla', 'A Mishra', 'SK Trivedi', 'JH Kallis'],
 ['PP Ojha', 'SL Malinga', 'DW Steyn', 'KA Pollard', 'IK Pathan'],
 ['JH Kallis', 'R Bhatia', 'SK Trivedi', 'L Balaji', 'IK Pathan'],
 ['SL Malinga', 'RP Singh', 'UT Yadav', 'RJ Harris', 'YK Pathan'],
 ['Harbhajan Singh', 'A Mishra', 'PP Chawla', 'AR Patel', 'M Ashwin']]
In [113]:
csk_run
Out[113]:
[[75, 63, 54, 50, 44],
 [60, 44, 42, 42, 36],
 [31, 31, 27, 26, 25],
 [23, 22, 22, 20, 20],
 [33, 24, 23, 21, 19]]
In [138]:
def drawplots(player_runs,player_names,batsman_name):
    
    
    fig = make_subplots(rows=2,cols = 3,subplot_titles=tuple(batsman_name),
                       specs=[[{'type':'domain'},{'type':'domain'},{'type':'domain'}],
                              [{'type':'domain'},{'type':'domain'},{'type':'domain'}]],shared_yaxes=True)
    fig.add_trace(go.Pie(labels=player_names[0],values=player_runs[0],name = batsman_name[0]),1,1)
    fig.add_trace(go.Pie(labels=player_names[1],values=player_runs[1],name = batsman_name[1]),1,2)
    fig.add_trace(go.Pie(labels=player_names[2],values=player_runs[2],name = batsman_name[2]),1,3)
    fig.add_trace(go.Pie(labels=player_names[3],values=player_runs[3],name = batsman_name[3]),2,1)
    fig.add_trace(go.Pie(labels=player_names[4],values=player_runs[4],name = batsman_name[4]),2,2)
#     fig.add_trace(go.Pie(labels=player_names[4],values=player_runs[4]),2,3)
    fig.update_traces(hole=.4, hoverinfo="label+percent+value")
    fig.update_layout(height = 750,width = 900)

    fig.show()
    
In [139]:
teams = ['csk','mi','rcb','kkr','rr','kxip','dd','srh']
top_bat = [top_bat_csk,top_bat_mi,top_bat_rcb,top_bat_kkr,top_bat_rr,top_bat_kxip,top_bat_dd,top_bat_srh]
player_name = [csk_player,mi_player,rcb_player,kkr_player,rr_player,kxip_player,dd_player,srh_player]
player_runs = [csk_run,mi_run,rcb_run,kkr_run,rr_run,kxip_run,dd_run,srh_run]
batsman_name = [csk_name,mi_name,rcb_name,kkr_name,rr_name,kxip_name,dd_name,srh_name]
for j,k,l in zip(player_name,player_runs,batsman_name):
    drawplots(k,j,l)
    print("-"*120)
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------