TidyTuesday Week 26: US Petrol Prices

This week we’re exploring weekly US gas prices! The data comes from the U.S. Energy Information Administration (EIA), which publishes average retail gasoline and diesel prices each Monday.

TidyTuesday
Data Visualization
Python Programming
Machine Learning
2025
Author

Peter Gray

Published

July 1, 2025

1. Python code

Show code
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt

# Load the data
weekly_gas_prices = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-01/weekly_gas_prices.csv')

# Filter and sort data for regular gasoline
petrol = weekly_gas_prices[weekly_gas_prices["fuel"] == "gasoline"]
petrol = petrol[petrol["grade"] == "regular"]
petrol = petrol[petrol["formulation"] == "all"]
petrol['date'] = pd.to_datetime(petrol['date'])
petrol = petrol.sort_values(by="date")

# --- Change for Monthly Aggregation ---
# Create a 'month_year' column for aggregation
petrol['month_year'] = petrol['date'].dt.to_period('M')


monthly_petrol = petrol.groupby('month_year')['price'].mean().reset_index()
monthly_petrol['month_year'] = monthly_petrol['month_year'].dt.to_timestamp() 


monthly_petrol.rename(columns={'month_year': 'date'}, inplace=True)
monthly_petrol = monthly_petrol.sort_values(by='date')


prices = monthly_petrol[['price']].values

scaler = MinMaxScaler()
scaled_prices = scaler.fit_transform(prices)

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i])
    return np.array(X), np.array(y)


sequence_length = 36  
X, y = create_sequences(scaled_prices, sequence_length)

split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

model = Sequential()
model.add(LSTM(50, return_sequences=False, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.5))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')


history = model.fit(X_train, y_train, epochs=100, batch_size=4, validation_data=(X_test, y_test), verbose=0) 

predicted = model.predict(X_test)
predicted_prices = scaler.inverse_transform(predicted)
actual_prices = scaler.inverse_transform(y_test)

# plt.figure(figsize=(12, 7))
# test_dates = monthly_petrol['date'].iloc[split + sequence_length:].reset_index(drop=True)
# 
# 
# # plt.plot(test_dates, actual_prices, label='Actual Monthly Price')
# # plt.plot(test_dates, predicted_prices, label='Predicted Monthly Price')
# # plt.title("LSTM Monthly Petrol Price Prediction")
# # plt.xlabel("Date")
# # plt.ylabel("Price")
# # plt.legend()
# # plt.grid(True)
# # plt.show()

# Forecasting
n_future_months = 24
last_sequence = X[-1]  

predicted_sequence = []
current_seq = last_sequence.copy()

for _ in range(n_future_months):
    pred = model.predict(current_seq[np.newaxis, :, :], verbose=0)[0, 0]
    predicted_sequence.append(pred)

  
    current_seq = np.append(current_seq[1:], [[pred]], axis=0)

predicted_future_prices = scaler.inverse_transform(np.array(predicted_sequence).reshape(-1, 1))

last_actual_date = monthly_petrol['date'].max()

future_dates = pd.date_range(start=last_actual_date + pd.DateOffset(months=1), periods=n_future_months, freq='MS') # 'MS' for Month Start

plt.figure(figsize=(12, 7))


actual_recent_months = monthly_petrol.tail(36)
plt.plot(actual_recent_months['date'], actual_recent_months['price'], label="Actual Monthly Price (Last 24 Months)")


plt.plot(future_dates, predicted_future_prices, label=f"Forecast (Next {n_future_months} Months)", color='red', linestyle='--')

plt.title(f"Monthly Regular Petrol Price Forecast for Next {n_future_months} Months")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid(False)
plt.show()
1/3 ━━━━━━━━━━━━━━━━━━━━ 0s 222ms/step3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 105ms/step3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 118ms/step

Back to top