xkcd Color Survey

Author

Jo Hardin

Published

July 8, 2025

library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)

library(reticulate)
use_python("/Users/jsh04747/miniforge3/bin/python3", required = TRUE)
answers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/answers.csv')
color_ranks <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/color_ranks.csv')
users <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/users.csv')
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
answers = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/answers.csv')
color_ranks = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/color_ranks.csv')
users = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-08/users.csv')

answers
         user_id      hex  rank
0              1  #8240EA     1
1              2  #4B31EA     3
2              2  #584601     5
3              2  #DA239C     4
4              2  #B343E5     1
...          ...      ...   ...
1058206   152397  #7238F0     1
1058207   152398  #8E14CD     1
1058208   152398  #0A49E7     3
1058209   152400  #38A30E     2
1058210   152401  #4D004B     1

[1058211 rows x 3 columns]
plt.figure(figsize=(10,6))

orders = answers['rank'].value_counts().index
colors = ['#15b01a', '#0343df', '#7e1e9c', '#ff81c0', '#653700']

sns.countplot(
  data = answers,
  x = "rank",
  order = orders,
  palette = colors
)
plt.xlabel('rank of color')
plt.ylabel('')
plt.title('Highest ranked colors')
plt.legend(title='xkcd color survey', loc='upper center', 
           bbox_to_anchor=(0.5, -0.15), ncol=2, frameon=False)
plt.tight_layout()

plt.show()

answers |> 
  sample_n(1000) |> 
  ggplot(aes(y = user_id, x = rank, color = hex)) + 
  geom_point() + 
  scale_color_identity()

color_data <- answers |> 
  inner_join(filter(users, spam_prob < 0.05), by = c("user_id" = "user_id")) |>
  select(hex, rank) |> 
  distinct() |> 
  group_by(rank) |> 
  mutate(dec = broman::hex2dec(stringr::str_remove(hex, "#")),
         hues = grDevices::rgb2hsv(grDevices::col2rgb(hex))["h", ]) |> 
  arrange(hues) |> 
  mutate(idx = row_number()) |> 
  mutate(rank2 = case_when(
    rank == 1 ~ "purple",
    rank == 2 ~ "green",
    rank == 3 ~ "blue",
    rank == 4 ~ "pink",
    rank == 5 ~ "brown"))
color_data |> 
  ggplot(aes(x = idx, xend = idx, y = rank - 0.45, yend = rank + 0.45, color = hex)) + 
  geom_segment() + 
  scale_y_continuous(breaks = color_data$rank, labels = color_data$rank2, trans = "reverse") + 
  scale_color_identity() +
  labs(x = "", y = "")

Five standard colors - purple, green, blue, pink, and brown were given as labels to many thousands of different hues of colors. There is a large range of different colors for each of the standard labelings. The purple label was given to almost 90,000 different hex colors.

Each vertical line is a color that was shown to a user who called that color, for example, ‘purple’. The graph shows the range of colors that were labeled by users as ‘purple’, ‘green’, ‘blue’, ‘pink’, and ‘brown’. The x-axis represents the number of unique hex colors given the standard color labeling.
praise()
[1] "You are finest!"