import pandas as pd
import plotly.express as px
vesuvius = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-05-13/vesuvius.csv')
# Data Cleaning
# remove revised
vesuvius = vesuvius[vesuvius['review_level'] != "revised"]
# Remove NAs from longitude and latitude
vesuvius = vesuvius.dropna(subset = ['latitude', 'longitude', 'duration_magnitude_md'])
# Remove Negative values from magnitude because it was messing with stuff later on
# Remove earthquakes 0.3 or less so I can further reduce the number and aren't within the margin of error
vesuvius = vesuvius[vesuvius['duration_magnitude_md'] >0.3]
# Fix the dates
# split the time column so I can use the date
vesuvius['time'] = pd.to_datetime(vesuvius['time'])
vesuvius['date'] = vesuvius['time'].dt.date
# 6000 is a lot of earthquakes - lets only from the 2020S
vesuvius = vesuvius[vesuvius['year'] >= 2020]
min_mag = vesuvius['duration_magnitude_md'].min()
max_mag = vesuvius['duration_magnitude_md'].max()
# Create graph
fig = px.scatter_mapbox(
vesuvius,
lat="latitude",
lon="longitude",
color="duration_magnitude_md",
size="duration_magnitude_md",
animation_frame="date",
color_continuous_scale="Viridis",
range_color=[min_mag, max_mag], # ← fix color scale
size_max=15,
zoom=10,
center={"lat": 40.821, "lon": 14.426},
mapbox_style="carto-positron",
title="Earthquake Occurrences in the Vicinity of Mount Vesuvius Over Time <br> (2020–2024)",
labels={"duration_magnitude_md": "Magnitude (Md)"}
)
fig.show()