If the button below says "Show Widgets," please click to initialize the interactive features on this page. It may take a few minutes to load...
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from datetime import datetime
from datetime import date
import calendar
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_context('notebook')
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
def getTrainedModel():
# Historical bike rental dataset
DATA="./hour.csv"
raw = pd.read_csv(DATA, parse_dates=['dteday'])
type(raw['dteday'][0])
#Clean the data
raw.drop(['instant', 'hum', 'windspeed', 'casual', 'registered', 'weathersit'], axis=1, inplace=True)
raw.drop(['dteday','atemp', 'yr'], axis=1, inplace=True)
# To improve the accuracy of the linear regression model,because this dataset is a combination of continuous and categorical data:
# Create dummy variables of the dataset
# Dummy variables are a numeric variable which represents the sub-categories or sub-groups of the categorical variables of the dataset
season = pd.get_dummies(raw['season'],prefix='season',drop_first=False)
holiday = pd.get_dummies(raw['holiday'],prefix='holiday',drop_first=False)
mnth = pd.get_dummies(raw['mnth'],prefix='mnth',drop_first=False)
hr = pd.get_dummies(raw['hr'],prefix='hr',drop_first=False)
data = pd.concat([raw,season,holiday,mnth,hr],axis=1)
data.drop(['season','holiday','mnth','hr'], axis=1,inplace=True)
# Create the x-array
x = data.drop('cnt',axis=1)
# Create the y-array
y = raw['cnt']
# Test data will be 30% of entire dataset
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)
# Create instance of the Linear Regression object
model = LinearRegression()
# Train this model on the training data using scikit-learn's fit method
trainedModel = model.fit(x_train, y_train)
return trainedModel
# To allow users to request a day for a prediction of future bike availability
# Make dashboard start with tomorrow
import datetime
currentday = datetime.datetime.today()
print("today=", currentday)
tomorrow = datetime.datetime.today() + datetime.timedelta(days=1)
print("tomorrow=", tomorrow)
def get_months(day):
month = day.month
month_1= 0
month_2 = 0
month_3 = 0
month_4 = 0
month_5 = 0
month_6 = 0
month_7 = 0
month_8 = 0
month_9 = 0
month_10 = 0
month_11 = 0
month_12 = 0
if month == 1:
month_1 = 1
elif month == 2:
month_2 = 1
elif month == 3:
month_3 = 1
elif month == 4:
month_4 = 1
elif month == 5:
month_5 = 1
elif month == 6:
month_6 = 1
elif month == 7:
month_7 = 1
elif month == 8:
month_8 = 1
elif month == 9:
month_9 = 1
elif month == 10:
month_10 = 1
elif month == 11:
month_11 = 1
elif month == 12:
month_12 = 1
return month, month_1, month_2, month_3, month_4, month_5, month_6, month_7, month_8, month_9, month_10, month_11, month_12
mnths = get_months(tomorrow)
month = mnths[0]
month_1 = mnths[1]
month_2 = mnths[2]
month_3 = mnths[3]
month_4 = mnths[4]
month_5 = mnths[5]
month_6 = mnths[6]
month_7 = mnths[7]
month_8 = mnths[8]
month_9 = mnths[9]
month_10 = mnths[10]
month_11 = mnths[11]
month_12 = mnths[12]
today= 2021-09-09 16:57:46.851747 tomorrow= 2021-09-10 16:57:46.856803
Y = 2000 # dummy leap year to allow input X-02-29 (leap day)
seasons = [('4', (date(Y, 1, 1), date(Y, 3, 20))),
('1', (date(Y, 3, 21), date(Y, 6, 20))),
('2', (date(Y, 6, 21), date(Y, 9, 22))),
('3', (date(Y, 9, 23), date(Y, 12, 20))),
('4', (date(Y, 12, 21), date(Y, 12, 31)))]
tom = tomorrow.date()
def get_season(day):
day = day.replace(year=Y)
return next(season for season, (start, end) in seasons
if start <= day <= end)
seasonout = get_season(tom)
def set_seasons(season):
if season == '1':
season1 = 1
season2 = 0
season3 = 0
season4 = 0
return season1, season2, season3, season4
elif season == '2':
season1 = 0
season2 = 1
season3 = 0
season4 = 0
return season1, season2, season3, season4
elif season == '3':
season1 = 0
season2 = 0
season3 = 1
season4 = 0
return season1, season2, season3, season4
elif season == '4':
season1 = 0
season2 = 0
season3 = 0
season4 = 1
return season1, season2, season3, season4
else:
print('Something wrong happened.')
seasonsset = set_seasons(seasonout)
print(seasonsset)
season1 = seasonsset[0]
season2 = seasonsset[1]
season3 = seasonsset[2]
season4 = seasonsset[3]
(0, 1, 0, 0)
import holidays
us_holidays = holidays.UnitedStates()
# Print all the holidays in UnitedKingdom in year 2018
for ptr in holidays.UnitedStates(years = 2020).items():
print(ptr)
print(tomorrow in us_holidays)
def get_holiday(day):
status = day in us_holidays
if status == True:
holiday = 1
elif status == False:
holiday = 0
return holiday
holidayout = get_holiday(tomorrow)
print(holidayout)
def set_holidays(holiday):
if holidayout == 1:
holiday_0 = 0
holiday_1 = 1
elif holidayout == 0:
holiday_0 = 1
holiday_1 = 0
else:
print('Something wrong happened.')
return holiday_0, holiday_1
holidaysset = set_holidays(holidayout)
holiday_0 = holidaysset[0]
holiday_1 = holidaysset[1]
(datetime.date(2020, 1, 1), "New Year's Day") (datetime.date(2020, 1, 20), 'Martin Luther King Jr. Day') (datetime.date(2020, 2, 17), "Washington's Birthday") (datetime.date(2020, 5, 25), 'Memorial Day') (datetime.date(2020, 7, 4), 'Independence Day') (datetime.date(2020, 7, 3), 'Independence Day (Observed)') (datetime.date(2020, 9, 7), 'Labor Day') (datetime.date(2020, 10, 12), 'Columbus Day') (datetime.date(2020, 11, 11), 'Veterans Day') (datetime.date(2020, 11, 26), 'Thanksgiving') (datetime.date(2020, 12, 25), 'Christmas Day') False 0
def get_day_of_week(day):
day_of_week = day.weekday()
return day_of_week
day_of_week = get_day_of_week(tomorrow)
def get_workingday(day):
daynum = day.weekday()
if get_holiday(day) == True or daynum == 5 or daynum == 6:
workday = 1
else:
workday = 0
return workday
workday = get_workingday(tomorrow)
avgtemp = [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.4, 0.4, 0.4, 0.5, 0.5, 0.6, 0.7, 0.7, 0.7, 0.6, 0.6, 0.6, 0.5, 0.5, 0.5, 0.4, 0.4, 0.4]
cnt = 0
userout = {'weekday': day_of_week,
'workingday': workday,
'temp': avgtemp,
'season': seasonout,
'holiday': holidayout,
'mnth': month,
'hr': [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
}
user_out = pd.DataFrame (userout, columns = ['weekday', 'workingday', 'temp','season','holiday', 'mnth','hr'])
cnt = 0
user = {'weekday': day_of_week,
'workingday': workday,
'temp': avgtemp,
'season_1': season1,
'season_2': season2,
'season_3': season3,
'season_4': season4,
'holiday_0': holiday_0,
'holiday_1': holiday_1,
'mnth_1': month_1,
'mnth_2': month_2,
'mnth_3': month_3,
'mnth_4': month_4,
'mnth_5': month_5,
'mnth_6': month_6,
'mnth_7': month_7,
'mnth_8': month_8,
'mnth_9': month_9,
'mnth_10': month_10,
'mnth_11': month_11,
'mnth_12': month_12,
'hr': [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23],
'cnt' : cnt
}
user_predictions = pd.DataFrame (user, columns = ['weekday', 'workingday', 'temp','season_1', 'season_2', 'season_3', 'season_4','holiday_0', 'holiday_1','mnth_1', 'mnth_2', 'mnth_3', 'mnth_4', 'mnth_5', 'mnth_6', 'mnth_7', 'mnth_8', 'mnth_9', 'mnth_10', 'mnth_11', 'mnth_12','hr', 'cnt'])
# Create dummy variables of the user dataset
hr = pd.get_dummies(user_predictions['hr'],prefix='hr',drop_first=False)
data1 = pd.concat([user_predictions,hr],axis=1)
data1.drop(['hr'], axis=1,inplace=True)
# Predict the number of bikes rented per hour tomorrow
# Create the x-array
x2 = data1.drop('cnt',axis=1)
# Make prediction on the x values
trainedModel = getTrainedModel()
predictions2 = trainedModel.predict(x2)
user_out.insert(7, "bikes_rented", predictions2)
# Predict the number of bikes rented each hour on the day the user chose with pre-supplied average temps
import calendar
def get_day_prediction(day):
months = get_months(day)
month = months[0]
month_1 = months[1]
month_2 = months[2]
month_3 = months[3]
month_4 = months[4]
month_5 = months[5]
month_6 = months[6]
month_7 = months[7]
month_8 = months[8]
month_9 = months[9]
month_10 = months[10]
month_11 = months[11]
month_12 = months[12]
season = get_season(day)
seasons = set_seasons(season)
season1 = seasons[0]
season2 = seasons[1]
season3 = seasons[2]
season4 = seasons[3]
holiday = get_holiday(day)
holidays = set_holidays(holiday)
holiday_0 = holidays[0]
holiday_1 = holidays[1]
day_of_week = get_day_of_week(day)
workingday = get_workingday(day)
cnt = 0
user_out.weekday = day_of_week
user_out.workingday = workingday
user_out.season = season
user_out.holiday = holiday
user_out.mnth = month
user_predictions.weekday = day_of_week
user_predictions.workingday = workingday
user_predictions.season_1 = season1
user_predictions.season_2 = season2
user_predictions.season_3 = season3
user_predictions.season_4 = season4
user_predictions.holiday_0 = holiday_0
user_predictions.holiday_1 = holiday_1
user_predictions.mnth_1 = month_1
user_predictions.mnth_2 = month_2
user_predictions.mnth_3 = month_3
user_predictions.mnth_4 = month_4
user_predictions.mnth_5 = month_5
user_predictions.mnth_6 = month_6
user_predictions.mnth_7 = month_7
user_predictions.mnth_8 = month_8
user_predictions.mnth_9 = month_9
user_predictions.mnth_10 = month_10
user_predictions.mnth_11 = month_11
user_predictions.mnth_12 = month_12
# Create dummy variables of the user dataset
hr = pd.get_dummies(user_predictions['hr'],prefix='hr',drop_first=False)
data1 = pd.concat([user_predictions,hr],axis=1)
data1.drop(['hr'], axis=1,inplace=True)
x3 = data1.drop('cnt',axis=1)
trainedModel = getTrainedModel()
predictions = trainedModel.predict(x3)
user_out.bikes_rented = predictions
user_out['mnth'] = user_out['mnth'].apply(lambda x: calendar.month_abbr[x])
wdi = {0:"Mon", 1:"Tue", 2:"Wed", 3:"Thu", 4:"Fri", 5:"Sat", 6:"Sun"}
user_out["weekday"].replace(wdi, inplace=True)
sdi = {"1":"spring", "2":"summer", "3":"fall", "4":"winter"}
user_out["season"].replace(sdi, inplace=True)
di = {1: "no", 0: "yes"}
user_out["workingday"].replace(di, inplace=True)
di2 = {1: "yes", 0: "no"}
user_out["holiday"].replace(di2, inplace=True)
print(user_out[['mnth', 'weekday','season','workingday','holiday', 'hr', 'bikes_rented']])
# Demand per hour
# Negative numbers can be assumed to = 0 bikes rented for that hour
dt = day.strftime("%B %d, %Y")
plt.figure(figsize=(12,6))
plt.ylim(0, 700)
sns.barplot(data = user_out, x = 'hr', y = 'bikes_rented', palette = 'rainbow').set(title= dt)
# Predict the number of bikes rented each hour on the day and for the temp the user chose
def get_tmp_prediction(day, temp):
months = get_months(day)
month = months[0]
month_1 = months[1]
month_2 = months[2]
month_3 = months[3]
month_4 = months[4]
month_5 = months[5]
month_6 = months[6]
month_7 = months[7]
month_8 = months[8]
month_9 = months[9]
month_10 = months[10]
month_11 = months[11]
month_12 = months[12]
season = get_season(day)
seasons = set_seasons(season)
season1 = seasons[0]
season2 = seasons[1]
season3 = seasons[2]
season4 = seasons[3]
holiday = get_holiday(day)
holidays = set_holidays(holiday)
holiday_0 = holidays[0]
holiday_1 = holidays[1]
day_of_week = get_day_of_week(day)
workingday = get_workingday(day)
cnt = 0
user_out.weekday = day_of_week
user_out.workingday = workingday
user_out.season = season
user_out.holiday = holiday
user_out.mnth = month
user_out.temp = temp
user_predictions.weekday = day_of_week
user_predictions.workingday = workingday
user_predictions.season_1 = season1
user_predictions.season_2 = season2
user_predictions.season_3 = season3
user_predictions.season_4 = season4
user_predictions.holiday_0 = holiday_0
user_predictions.holiday_1 = holiday_1
user_predictions.temp = temp
user_predictions.mnth_1 = month_1
user_predictions.mnth_2 = month_2
user_predictions.mnth_3 = month_3
user_predictions.mnth_4 = month_4
user_predictions.mnth_5 = month_5
user_predictions.mnth_6 = month_6
user_predictions.mnth_7 = month_7
user_predictions.mnth_8 = month_8
user_predictions.mnth_9 = month_9
user_predictions.mnth_10 = month_10
user_predictions.mnth_11 = month_11
user_predictions.mnth_12 = month_12
# Create dummy variables of the user dataset
hr = pd.get_dummies(user_predictions['hr'],prefix='hr',drop_first=False)
data1 = pd.concat([user_predictions,hr],axis=1)
data1.drop(['hr'], axis=1,inplace=True)
x4 = data1.drop('cnt',axis=1)
trainedModel = getTrainedModel()
predictions = trainedModel.predict(x4)
user_out.bikes_rented = predictions
user_out['mnth'] = user_out['mnth'].apply(lambda x: calendar.month_abbr[x])
wdi = {0:"Mon", 1:"Tue", 2:"Wed", 3:"Thu", 4:"Fri", 5:"Sat", 6:"Sun"}
user_out["weekday"].replace(wdi, inplace=True)
sdi = {"1":"spring", "2":"summer", "3":"fall", "4":"winter"}
user_out["season"].replace(sdi, inplace=True)
di = {1: "no", 0: "yes"}
user_out["workingday"].replace(di, inplace=True)
di2 = {1: "yes", 0: "no"}
user_out["holiday"].replace(di2, inplace=True)
print(user_out[['mnth', 'weekday', 'temp','season','workingday','holiday', 'hr', 'bikes_rented']])
# Demand per hour
# Negative numbers can be assumed to = 0 bikes rented for that hour
dt = day.strftime("%B %d, %Y")
plt.figure(figsize=(12,6))
plt.ylim(0, 700)
sns.barplot(data = user_out, x = 'hr', y = 'bikes_rented', palette = 'rainbow').set(title= dt)
# Predict the number of bikes rented during the hour of user's choice for each temperature in range 0.0 - 1.0 C, predicted in 0.2 increments
def get_hr_prediction(dy, hr):
hour = hr
day = dy
df = pd.DataFrame()
months = get_months(day)
month = months[0]
month_1 = months[1]
month_2 = months[2]
month_3 = months[3]
month_4 = months[4]
month_5 = months[5]
month_6 = months[6]
month_7 = months[7]
month_8 = months[8]
month_9 = months[9]
month_10 = months[10]
month_11 = months[11]
month_12 = months[12]
season = get_season(day)
seasons = set_seasons(season)
season1 = seasons[0]
season2 = seasons[1]
season3 = seasons[2]
season4 = seasons[3]
holiday = get_holiday(day)
holidays = set_holidays(holiday)
holiday_0 = holidays[0]
holiday_1 = holidays[1]
day_of_week = get_day_of_week(day)
workingday = get_workingday(day)
cnt = 0
user_out.weekday = day_of_week
user_out.workingday = workingday
user_out.season = season
user_out.holiday = holiday
user_out.mnth = month
user_predictions.weekday = day_of_week
user_predictions.workingday = workingday
user_predictions.season_1 = season1
user_predictions.season_2 = season2
user_predictions.season_3 = season3
user_predictions.season_4 = season4
user_predictions.holiday_0 = holiday_0
user_predictions.holiday_1 = holiday_1
user_predictions.mnth_1 = month_1
user_predictions.mnth_2 = month_2
user_predictions.mnth_3 = month_3
user_predictions.mnth_4 = month_4
user_predictions.mnth_5 = month_5
user_predictions.mnth_6 = month_6
user_predictions.mnth_7 = month_7
user_predictions.mnth_8 = month_8
user_predictions.mnth_9 = month_9
user_predictions.mnth_10 = month_10
user_predictions.mnth_11 = month_11
user_predictions.mnth_12 = month_12
# Create dummy variables of the user dataset
hourd = pd.get_dummies(user_predictions['hr'],prefix='hr',drop_first=False)
temp = 0.0
for t in range(0, 12, 2):
user_out.temp = temp
user_predictions.temp = temp
data1 = pd.concat([user_predictions,hourd],axis=1)
data1.drop(['hr'], axis=1,inplace=True)
x5 = data1.drop('cnt',axis=1)
trainedModel = getTrainedModel()
predictions = trainedModel.predict(x5)
user_out.bikes_rented = predictions
df = pd.concat([df, user_out])
temp = temp + 0.2
indexNames = df[df['hr'] != hour].index
df.drop(indexNames, inplace=True)
df['mnth'] = df['mnth'].apply(lambda x: calendar.month_abbr[x])
wdi = {0:"Mon", 1:"Tue", 2:"Wed", 3:"Thu", 4:"Fri", 5:"Sat", 6:"Sun"}
df["weekday"].replace(wdi, inplace=True)
print(df[['mnth','weekday','hr', 'temp', 'bikes_rented']])
dt = day.strftime("%B %d, %Y")
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot()
ax.plot(df.temp, df.bikes_rented)
ax.set_xlabel("temp in Celsius")
ax.set_ylabel("bikes rented")
ax.set_title("Hour: " + str(hour))
On which future day are you interested in viewing possible bike availability?
# Make the dashboard interactive and allow users to query for wanted information
# Request a date from the user for the day they would like to check bike availability
from ipywidgets import DOMWidget, ValueWidget, register, interact, interactive, fixed, interact_manual, Layout
from ipywidgets.widgets.interaction import show_inline_matplotlib_plots
from IPython.display import display, clear_output
import ipywidgets as widgets
tom = tomorrow.date()
day_picked = widgets.DatePicker(
description='Pick a Date',
disabled=False,
value = tom
)
def f(daypicked):
print('Date Chosen: {}'.format(daypicked))
out1 = widgets.interactive_output(f, {'daypicked': day_picked})
widgets.HBox([widgets.VBox([day_picked]), out1])
Please click "View Predictions" to see an estimate of how many bikes may be rented during each hour on the day chosen above.
The more bikes that are rented, fewer number of bikes will be available to rent.
# Demand per hour on a day user has chosen with pre-supplied average temps
# Negative numbers can be assumed to = 0 bikes rented for that hour
from IPython.display import display
daypicked = day_picked.value
button1 = widgets.Button(description="View Predictions")
output1 = widgets.Output()
display(button1, output1)
def on_button_clicked1(b):
with output1:
clear_output()
daypicked = day_picked.value
get_day_prediction(daypicked)
show_inline_matplotlib_plots()
button1.on_click(on_button_clicked1)
Please pick an hour on the day chosen above that you are interested in seeing possible bike availability, and a possible temperature for that day.
# Allow the user to pick an hour to adjust the temperature settings for
hr_picked = widgets.Dropdown(
options=['0','1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23'],
value='12',
description='Pick an Hour:',
disabled=False,
)
def h(hrpicked):
print('Hour Chosen: {}'.format(hrpicked))
out2 = widgets.interactive_output(h, {'hrpicked': hr_picked})
widgets.HBox([widgets.VBox([hr_picked]), out2])
hrpicked = int(hr_picked.value)
print(hrpicked)
tmpvl = user_out["temp"].iloc[hrpicked]
print(tmpvl)
12 0.7
# Allow the user to adjust the temperature to see how changes in temperature affect bike availability
temp_picked = widgets.FloatSlider(
value=tmpvl,
min=0.0,
max=1.0,
step=0.1,
description='Temp Celsius',
disabled=False,
continuous_update=False,
orientation='horizontal',
readout=True,
readout_format='.1f',
)
def t(tmppicked):
print('Temperature Chosen: {}'.format(tmppicked))
out3 = widgets.interactive_output(t, {'tmppicked': temp_picked})
widgets.HBox([widgets.VBox([temp_picked]), out3])
To see how the temperature chosen above affects bike availability at every hour of the day picked above, please click "View Predictions." This shows a 60% chance of how many bikes will be rented at each hour if the temperature is the temperature of your choice.
# View how a specific temperature affects bike availability for every hour during day
button2 = widgets.Button(description="View Predictions")
output2 = widgets.Output()
display(button2, output2)
def on_button_clicked2(b):
with output2:
clear_output()
daypicked = day_picked.value
temppicked = temp_picked.value
get_tmp_prediction(daypicked, temppicked)
show_inline_matplotlib_plots()
button2.on_click(on_button_clicked2)
Please click "View Predictions" to see how varying temperatures will affect bike availability on the day and hour chosen above.
# View how the bike availability at a specfic hour changes as the temperature changes
button3 = widgets.Button(description="View Predictions")
out3 = widgets.Output()
display(button3, out3)
def on_button_clicked3(b):
with out3:
clear_output()
hrpicked = int(hr_picked.value)
daypicked = day_picked.value
get_hr_prediction(daypicked, hrpicked)
show_inline_matplotlib_plots()
button3.on_click(on_button_clicked3)
Are you interested in viewing the data and machine learning model that were used to provide these predictions? Clicking the link below will take you to the Jupyter Notebook that was used to analyze the data and provide this information. Please feel free to interact with the data in the Juptyter Notebook on your own as you please! If you are able to create a model with accuracy greater than 60%, let us know by calling or emailing us and your next adventure will be on us :)
Right-click to open in a new tab..