Code
library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)
library(ggsankey)
library(patchwork)library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)
library(ggsankey)
library(patchwork)This week we are exploring data related to the Selected British Literary Prizes (1990-2022) dataset which comes from the Post45 Data Collective.
“This dataset contains primary categories of information on individual authors comprising gender, sexuality, UK residency, ethnicity, geography and details of educational background, including institutions where the authors acquired their degrees and their fields of study. Along with other similar projects, we aim to provide information to assess the cultural, social and political factors determining literary prestige. Our goal is to contribute to greater transparency in discussions around diversity and equity in literary prize cultures.”
Additional metadata discussion relating to the ethnicity, gender and sexuality, and educational classification variables is available on the Post45 site. Follow them on BlueSky at @post45data.bsky.social, and here on GitHub at @Post45-Data-Collective.
Thank you to Georgios Karamanis for the dataset suggestion!
In relation to ethical considerations, the authors note that…
“All of the information in this dataset is publicly available. Information about a writer’s location, gender identity, race, ethnicity, or education from scholarly and public sources can be sensitive. The data provided here enables the study of broad patterns and is not intended as definitive.”
Thank you to Jen Richmond for curating this week’s dataset.
prizes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-10-28/prizes.csv') |>
mutate(highest_degree = ifelse(is.na(highest_degree), "unknown",
highest_degree)) |>
mutate(highest_degree = forcats::fct_recode(highest_degree,
"a none" = "none", "b unknown" = "unknown",
"c Diploma" = "Diploma",
"d Certificate of Education" = "Certificate of Education",
"e Bachelors" = "Bachelors",
"f Masters" = "Masters",
"g Juris Doctor" = "Juris Doctor",
"h MD" = "MD",
"i Doctorate" = "Doctorate",
"j Postgraduate" = "Postgraduate")) |>
rename(ethnicity_full = ethnicity,
ethnicity = ethnicity_macro,
degree = highest_degree)booker <- prizes |>
filter(prize_name == "Booker Prize") |>
ggsankey::make_long(gender, ethnicity, degree)p1 <- ggplot(booker, aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_viridis_d() +
theme_sankey(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "Booker Prize")women_fiction <- prizes |>
filter(prize_name == "Women's Prize for Fiction") |>
ggsankey::make_long(gender, ethnicity, degree)p2 <- ggplot(women_fiction, aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_viridis_d() +
theme_sankey(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "Women's Prize for Fiction")baillie <- prizes |>
filter(prize_name == "Baillie Gifford Prize for Non-Fiction") |>
ggsankey::make_long(gender, ethnicity, degree)p3 <- ggplot(baillie, aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_viridis_d() +
theme_sankey(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "Baillie Gifford Prize \nfor Non-Fiction")man_booker <- prizes |>
filter(prize_name == "Man Booker Prize") |>
ggsankey::make_long(gender, ethnicity, degree)p4 <- ggplot(man_booker, aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_viridis_d() +
theme_sankey(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "Man Booker Prize")gold_dagger <- prizes |>
filter(prize_name == "Gold Dagger") |>
ggsankey::make_long(gender, ethnicity, degree)p5 <- ggplot(gold_dagger, aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_viridis_d() +
theme_sankey(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "Gold Dagger")hughes <- prizes |>
filter(prize_name == "Ted Hughes Award for New Work in Poetry") |>
ggsankey::make_long(gender, ethnicity, degree)p6 <- ggplot(hughes, aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = factor(node), label = node)) +
geom_sankey(flow.alpha = 0.6, node.color = "gray30") +
geom_sankey_label(size = 3, color = "white", fill = "gray40") +
scale_fill_viridis_d() +
theme_sankey(base_size = 18) +
labs(x = NULL) +
theme(legend.position = "none",
plot.title = element_text(size = 14)) +
labs(title = "Ted Hughes Award \nfor New Work in Poetry")(p1 + p2) / (p3 + p4) / (p5 + p6)praise()[1] "You are sensational!"