3-Point Shooting % as a Function of Time¶
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nba_api.stats.endpoints import shotchartdetail
%matplotlib inline
I installed an package called nba_api (https://github.com/swar/nba_api) in order to get the data from the NBA api.
Define functions:¶
def seasons_string(start_year,end_year):
'''
creates a list of NBA seasons from start-end
'''
years = np.arange(start_year,end_year)
seasons = []
for year in years:
string1 = str(year)
string2 = str(year+1)
season = '{}-{}'.format(string1,string2[-2:])
seasons.append(season)
return seasons
Get Data:¶
I'm running a loop to get shot chart data from multiple seasons
data = []
for season in seasons_string(2013,2019):
shotdata = shotchartdetail.ShotChartDetail(team_id='0',player_id='0',season_nullable=season,
context_measure_simple='FGM',timeout=60)
single_season = shotdata.get_data_frames()[0]
single_season['SEASON'] = season
data.append(single_season)
print(season)
data = pd.concat(data,ignore_index=True)
Data Analysis:¶
def get_time_seconds(g):
""" Use shotcart dataframe columns to get time in seconds """
period = g['PERIOD']
min_remaining = g['MINUTES_REMAINING']
sec_remaining = g['SECONDS_REMAINING']
return np.where(period < 5, period*12*60 - min_remaining*60-sec_remaining,4*12*60 + (period-4)*5*60 - min_remaining*60-sec_remaining)
I'm going to create time buckets so we can run the analysis per time bucket. First we convert the the time from the data to seconds. This is done with the get_time_seconds function I created. Then, I will use pandas.cut to create time bins in 24 seconds intervals. The intervals are created with numpy.arange.
data['TIME'] = get_time_seconds(data) # works much faster
#data['TIME'] = data.apply(get_time_seconds,axis=1)
# Create time buckets to use for aggregation
data['TIME_BUCKET'],time_bins = pd.cut(data['TIME'],bins=np.arange(0,60*48+1,24),retbins=True)
I'm going to group by the time buckets that I created and the shot zone range which will allow us to sepearate the shots based on the shooting range. We care about the shooting % and the frequency of the shots so we can look at the shot made flag and aggregate based on mean and size (to get the average shooting % and the total shots respectively). The unstack is very usefull because it converts an index to a column (it pivots the table). In this case we are pivoting the 2nd index which is shot zone range and converting it to columns.
### Apply aggregation
time_summary = data.groupby(['TIME_BUCKET','SHOT_ZONE_RANGE'])['SHOT_MADE_FLAG'].agg(['mean','size']).unstack(1)
# create a new column with the sum of all shots
time_summary[('size','all')] = time_summary.iloc[:,5:].sum(axis=1)
time_summary.head()
Plotting:¶
Get the data we want to plot:
# find the bin centers and convert from seconds to minutes
x = 0.5*(time_bins[1:]+time_bins[:-1])/60
# shooting %
y1 = 100*time_summary.loc[:,('mean','24+ ft.')].values
# shooting frequency
y2 = (time_summary.loc[:,('size','24+ ft.')]/time_summary.loc[:,('size','all')]).values
Create the plot:
### This part takes care of the plot style
plt.style.use('classic')
fig = plt.figure(figsize=(8,12))
bg_color = (0.98,0.98,0.98)
fig.set_facecolor(bg_color)
# create labels and title for figure
fig.text(0.01,0.01,'@EyalShafran',fontsize=14.0,color='gray',
horizontalalignment='left',verticalalignment='bottom')
fig.text(0.99,0.01,'Source: NBA.COM',fontsize=14.0,color='gray',
horizontalalignment='right',verticalalignment='bottom')
fig.text(0.01,0.99,'3-Points Shots vs. Game Time',fontsize=22.0,
horizontalalignment='left',weight="bold",verticalalignment='top')
fig.text(0.01,0.96,'Do players start missing more when they are tired?',transform=fig.transFigure,fontsize=16.0,
horizontalalignment='left',verticalalignment='top')
fig.text(0.53,0.04,'Game Time (MIN)',fontsize=16.0,color='black',
horizontalalignment='center',verticalalignment='center')
ax_left = 0.08
ax_bottom = 0.08
ax_width = 0.9
ax_height = 0.83
h_margin = 0.05
fig_width = ax_width
fig_height = (ax_height - h_margin)/2
ws = np.tile(np.arange(ax_left,ax_width+ax_left,fig_width),(1,2)).flatten()
hs = np.tile(np.arange(ax_bottom,ax_height+ax_bottom,fig_height+h_margin)[::-1],(1,1)).T.flatten()
ax = []
for i in range(2):
ax.append(fig.add_axes([ws[i], hs[i], fig_width, fig_height]))
ax[i].set_facecolor(bg_color)
ax[i].grid('on', linestyle='--',color='gray')
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].axes.tick_params(length=0)
ax[i].tick_params(labelsize=16)
### Here we do the actual plotting
colors = ['#008fd5', '#fc4f30', '#e5ae38', '#6d904f', '#8b8b8b', '#810f7c']
ax[0].plot(x,y1,'o-',color=colors[0],linewidth=4)
ax[0].set_xticks([0,12,24,36,48])
ax[0].set_ylabel('3-Point Field Goal %',fontsize=16)
ax[1].plot(x,y2,'o-',color=colors[0],linewidth=4)
ax[1].set_xticks([0,12,24,36,48]);
ax[1].set_ylabel('3-Point Attempts / Total FG Attempts',fontsize=16)
fig.savefig('Shooting_PCT_vs_time.png',bbox_inches='tight',facecolor=fig.get_facecolor(), edgecolor='none')
Let's plot the same for 8 ft. shoots¶
# find the bin centers and convert from seconds to minutes
x = 0.5*(time_bins[1:]+time_bins[:-1])/60
# shooting %
y1 = 100*time_summary.loc[:,('mean','Less Than 8 ft.')].values
# shooting frequency
y2 = (time_summary.loc[:,('size','Less Than 8 ft.')]/time_summary.loc[:,('size','all')]).values
### This part takes care of the plot style
plt.style.use('classic')
fig = plt.figure(figsize=(8,12))
bg_color = (0.98,0.98,0.98)
fig.set_facecolor(bg_color)
# create labels and title for figure
fig.text(0.01,0.01,'@EyalShafran',fontsize=14.0,color='gray',
horizontalalignment='left',verticalalignment='bottom')
fig.text(0.99,0.01,'Source: NBA.COM',fontsize=14.0,color='gray',
horizontalalignment='right',verticalalignment='bottom')
fig.text(0.01,0.99,'Close Range Shots vs. Game Time',fontsize=22.0,
horizontalalignment='left',weight="bold",verticalalignment='top')
fig.text(0.01,0.96,'Do players start missing more when they are tired?',transform=fig.transFigure,fontsize=16.0,
horizontalalignment='left',verticalalignment='top')
fig.text(0.53,0.04,'Game Time (MIN)',fontsize=16.0,color='black',
horizontalalignment='center',verticalalignment='center')
ax_left = 0.08
ax_bottom = 0.08
ax_width = 0.9
ax_height = 0.83
h_margin = 0.05
fig_width = ax_width
fig_height = (ax_height - h_margin)/2
ws = np.tile(np.arange(ax_left,ax_width+ax_left,fig_width),(1,2)).flatten()
hs = np.tile(np.arange(ax_bottom,ax_height+ax_bottom,fig_height+h_margin)[::-1],(1,1)).T.flatten()
ax = []
for i in range(2):
ax.append(fig.add_axes([ws[i], hs[i], fig_width, fig_height]))
ax[i].set_facecolor(bg_color)
ax[i].grid('on', linestyle='--',color='gray')
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].axes.tick_params(length=0)
ax[i].tick_params(labelsize=16)
### Here we do the actual plotting
colors = ['#008fd5', '#fc4f30', '#e5ae38', '#6d904f', '#8b8b8b', '#810f7c']
ax[0].plot(x,y1,'o-',color=colors[0],linewidth=4)
ax[0].set_xticks([0,12,24,36,48])
ax[0].set_ylabel('Less Than 8 ft. Field Goal %',fontsize=16)
ax[1].plot(x,y2,'o-',color=colors[0],linewidth=4)
ax[1].set_xticks([0,12,24,36,48]);
ax[1].set_ylabel('Less Than 8 ft. Attempts / Total FG Attempts',fontsize=16)
Comments
comments powered by Disqus