In 2010, the xkcd Color Survey asked hundreds of thousands of people to name colors they saw, revealing the different ways in which people perceive and label colors.
TidyTuesday
Data Visualization
Python Programming
2025
Author
Peter Gray
Published
July 8, 2025
1. Python code
Show code
import pandas as pdimport numpy as npimport matplotlib.pyplot as plt# Load the dataanswers = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/answers.csv')color_ranks = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/color_ranks.csv')users = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/users.csv')master = pd.merge(answers, users, on ="user_id", how ="left")master = master[master["spam_prob"] <=0.5]top_20_hex_per_monitor = ( master.groupby(["monitor", "hex"])["rank"] .mean() .reset_index() .sort_values(["monitor", "rank"]) .groupby("monitor") .head(20))filtered_top = master.merge( top_20_hex_per_monitor[["monitor", "hex"]], on=["monitor", "hex"])counts = ( filtered_top.groupby(["monitor", "hex"]) .size() .unstack(fill_value=0))percentages = counts.div(counts.sum(axis=1), axis=0) *100ax = percentages.plot( kind="bar", stacked=True, figsize=(12, 6), colormap="tab20", width=0.8)# Add percentage labelsfor i, monitor inenumerate(percentages.index): cumulative =0for hex_code in percentages.columns: value = percentages.loc[monitor, hex_code]if value >=5: ax.text( i, cumulative + value /2,f"{value:.0f}%", ha="center", va="center", fontsize=8, color="black", rotation=90 ) cumulative += value# Formattingax.set_ylabel("Percentage of Top Colours")ax.set_title("Top 20 Colours per Monitor (by Frequency)")ax.legend(title="Hex Colour", bbox_to_anchor=(1.05, 1), loc="upper left")plt.show()