Code
# 1️⃣ Load Data
import pandas as pd
= pd.read_csv('../data/raw/hotel_bookings.csv')
df
# Filter to completed bookings only (exclude cancellations)
= df[df['is_canceled'] == 0]
df
df.head()
5 rows × 32 columns
# 2️⃣ Aggregate Price & Demand
# Group by price buckets → example simple binning
'price_bucket'] = pd.cut(df['adr'], bins=[0, 50, 100, 150, 200, 300, 500, 1000])
df[
# Count bookings per price bucket
= df.groupby('price_bucket').size().reset_index(name='bookings')
demand_by_price
demand_by_price
C:\Users\atima\AppData\Local\Temp\ipykernel_17828\1096299440.py:7: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
demand_by_price = df.groupby('price_bucket').size().reset_index(name='bookings')
# 3️⃣ Plot Demand vs Price
import matplotlib.pyplot as plt
# Midpoint of each bucket → simple estimate
'price_mid'] = demand_by_price['price_bucket'].apply(lambda x: x.mid)
demand_by_price[
=(8,6))
plt.figure(figsize'price_mid'], demand_by_price['bookings'], marker='o')
plt.plot(demand_by_price['Bookings vs Price (Elasticity Curve)')
plt.title('Price (Midpoint of Bucket)')
plt.xlabel('Number of Bookings')
plt.ylabel(
plt.grid() plt.show()
# 4️⃣ Log-Log Regression (Elasticity Estimate)
import numpy as np
import statsmodels.api as sm
# Prepare log-log data
'price_mid'] = demand_by_price['price_mid'].astype(float)
demand_by_price[= np.log(demand_by_price['price_mid'])
X = np.log(demand_by_price['bookings'])
y
= sm.add_constant(X) # add intercept
X = sm.OLS(y, X).fit()
model
print(model.summary())
# Elasticity ≈ slope coefficient → % change in demand per % change in price
OLS Regression Results
==============================================================================
Dep. Variable: bookings R-squared: 0.598
Model: OLS Adj. R-squared: 0.518
Method: Least Squares F-statistic: 7.441
Date: Sat, 05 Jul 2025 Prob (F-statistic): 0.0414
Time: 20:15:04 Log-Likelihood: -14.821
No. Observations: 7 AIC: 33.64
Df Residuals: 5 BIC: 33.53
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 19.5084 4.513 4.323 0.008 7.909 31.108
price_mid -2.3678 0.868 -2.728 0.041 -4.599 -0.137
==============================================================================
Omnibus: nan Durbin-Watson: 1.033
Prob(Omnibus): nan Jarque-Bera (JB): 1.053
Skew: -0.709 Prob(JB): 0.591
Kurtosis: 1.734 Cond. No. 27.0
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
c:\GitHub\DynamicHotelPricingOptimization\.venv\Lib\site-packages\statsmodels\stats\stattools.py:74: ValueWarning: omni_normtest is not valid with less than 8 observations; 7 samples were given.
warn("omni_normtest is not valid with less than 8 observations; %i "
# 5️⃣ Visualize Fitted Elasticity Curve
= model.predict(X)
y_pred
=(8,6))
plt.figure(figsize'price_mid']), np.log(demand_by_price['bookings']), label='Observed')
plt.scatter(np.log(demand_by_price['price_mid']), y_pred, color='red', label='Fitted Line')
plt.plot(np.log(demand_by_price['Log-Log Elasticity Model')
plt.title('log(Price)')
plt.xlabel('log(Bookings)')
plt.ylabel(
plt.legend()
plt.grid() plt.show()