TidyTuesday Week 27: US Petrol Prices

In 2010, the xkcd Color Survey asked hundreds of thousands of people to name colors they saw, revealing the different ways in which people perceive and label colors.

TidyTuesday
Data Visualization
Python Programming
2025
Author

Peter Gray

Published

July 8, 2025

1. Python code

Show code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the data
answers = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/answers.csv')
color_ranks = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/color_ranks.csv')
users = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/users.csv')

master = pd.merge(answers, users, on = "user_id", how = "left")


master = master[master["spam_prob"] <= 0.5]


top_20_hex_per_monitor = (
    master.groupby(["monitor", "hex"])["rank"]
            .mean()
            .reset_index()
            .sort_values(["monitor", "rank"])
            .groupby("monitor")
            .head(20)
)

filtered_top = master.merge(
    top_20_hex_per_monitor[["monitor", "hex"]],
    on=["monitor", "hex"]
)


counts = (
    filtered_top.groupby(["monitor", "hex"])
               .size()
               .unstack(fill_value=0)
)


percentages = counts.div(counts.sum(axis=1), axis=0) * 100

ax = percentages.plot(
    kind="bar",
    stacked=True,
    figsize=(12, 6),
    colormap="tab20",
    width=0.8
)

# Add percentage labels
for i, monitor in enumerate(percentages.index):
    cumulative = 0
    for hex_code in percentages.columns:
        value = percentages.loc[monitor, hex_code]
        if value >= 5:
            ax.text(
                i, cumulative + value / 2,
                f"{value:.0f}%",
                ha="center", va="center", fontsize=8, color="black", rotation=90
            )
        cumulative += value

# Formatting
ax.set_ylabel("Percentage of Top Colours")
ax.set_title("Top 20 Colours per Monitor (by Frequency)")
ax.legend(title="Hex Colour", bbox_to_anchor=(1.05, 1), loc="upper left")

plt.show()

Back to top