TidyTuesday Week 33: Billboard Hot 100 Number Ones
This week we are exploring the Billboard Hot 100 Number Ones Database. This workbook contains substantial data about every song to ever top the Billboard Hot 100 between August 4, 1958 and January 11, 2025.
TidyTuesday
Data Visualization
Python Programming
2025
Author
Peter Gray
Published
August 19, 2025
Graphs of the Billboard 100 :::
1. Python code
Show code
import pandas as pdimport numpy as npfrom plotnine import*import patchworklib as plbillboard = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-26/billboard.csv")topics = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-26/topics.csv")rock = billboard[billboard["cdr_genre"] =="Rock"].copy()rock["rounded_rating"] = np.rint(rock["overall_rating"])rock["date"] = pd.to_datetime(rock["date"])rock["decade"] = (rock["date"].dt.year //10) *10rock["decade"] = rock["decade"].astype("category")# Histogram of Ratingshist = pl.load_ggplot( ggplot(data=rock)+ geom_histogram( aes(x="rounded_rating"), color="darkblue", fill="lightblue", bins=13 )+ theme_bw()+ theme(panel_grid=element_blank(), plot_title=element_text(margin={"b": 5}))+ scale_x_continuous(breaks=range(0, 11))+ labs( title="Distribuiton of Ratings for Rock Songs that Reached Number One", x="Rating \\n (rounded to nearest integer)", y="Count", ))# Box Plotbox = pl.load_ggplot( ggplot(rock)+ geom_boxplot(aes(x="decade", y="rounded_rating", fill="decade"))+ theme_bw()+ theme( panel_grid=element_blank(), legend_position="bottom", legend_box_margin=0, plot_title=element_text(margin={"b": 5}), )+ labs( title="Box plot of Rating of Rock Songs that Reached Number One", x="Decade", y="Rating \n (rounded to nearest integer)", fill="Decade", ))# Top 10 bands with multiple Number onesartist_counts = rock["artist"].value_counts()top10 = artist_counts.head(10)top10_df = top10.reset_index()top10_df.columns = ["artist", "count"]top10_df = top10_df.sort_values(by="count", ascending=True)top10_df["artist"] = pd.Categorical( top10_df["artist"], categories=top10_df["artist"], ordered=True)top10_chart = pl.load_ggplot( ggplot(top10_df)+ geom_bar(aes(x="artist", y="count"), stat="identity", fill="steelblue")+ geom_text( aes(x="artist", y="count + 0.5", label="count"), va="center", format_string="{:.0f}", position=position_dodge(width=0.9), )+ coord_flip()+ theme_bw()+ theme(panel_grid=element_blank())+ labs( title="Top 10 Rock Artists by Number of Number 1 Songs", x="Artist", y="Number of Number 1 Hits", ))# Proportion of Cow Bell Used in Songsinstruments = rock[["cdr_genre", "cowbell", "accordion", "banjo", "clarinet"]].copy()instruments_long = instruments.melt( id_vars="cdr_genre", var_name="instrument", value_name="present")cowbell_plot = pl.load_ggplot( ggplot( instruments_long.query("present == 1"), aes(x="cdr_genre", fill="instrument") )+ geom_bar(position="fill", width=0.5)+ scale_y_continuous(labels=lambda l: ["{:.0f}%".format(v *100) for v in l])+ labs( title="Proportion of Instruments Used in Songs by Genre", x="Genre", y="Proportion of Songs", fill="Instrument", )+ theme_bw()+ theme(panel_grid=element_blank(), axis_text_x=element_text(rotation=45, hjust=1)))combined = (hist | box) / (cowbell_plot | top10_chart)