The data on Women’s Rugby comes from ScrumQueens by way of Jacquie Tran.
fifteens <- read_csv("fifteens.csv")
sevens <- read_csv("sevens.csv")
Are some countries better at sevens than fifteens? Does it change over time?
sevens_win <- sevens %>%
mutate(year = year(date)) %>%
mutate(decade = floor(year/10)*10) %>%
group_by(decade, winner) %>%
summarize(no_wins_7 = n()) %>%
rename(country = winner)
sevens_loss <- sevens %>%
mutate(year = year(date)) %>%
mutate(decade = floor(year/10)*10) %>%
group_by(decade, loser) %>%
summarize(no_loss_7 = n()) %>%
rename(country = loser)
sevens_win_loss <- sevens_win %>%
full_join(sevens_loss, by = c("decade", "country")) %>%
mutate(prop_win_7 = no_wins_7 / (no_wins_7 + no_loss_7))
fifteens_win <- fifteens %>%
mutate(year = year(date)) %>%
mutate(decade = floor(year/10)*10) %>%
group_by(decade, winner) %>%
summarize(no_wins_15 = n()) %>%
rename(country = winner)
fifteens_loss <- fifteens %>%
mutate(year = year(date)) %>%
mutate(decade = floor(year/10)*10) %>%
group_by(decade, loser) %>%
summarize(no_loss_15 = n()) %>%
rename(country = loser)
fifteens_win_loss <- fifteens_win %>%
full_join(fifteens_loss, by = c("decade", "country")) %>%
mutate(prop_win_15 = no_wins_15 / (no_wins_15 + no_loss_15))
win_loss <- fifteens_win_loss %>%
full_join(sevens_win_loss, by = c("decade", "country")) %>%
drop_na(prop_win_15, prop_win_7)
win_loss %>%
group_by(country) %>%
mutate(count = n()) %>%
ungroup() %>%
filter(count >= 3) %>%
ggplot(aes(x = prop_win_7, y = prop_win_15)) +
geom_point(aes(color = country)) +
geom_abline(intercept = 0, slope = 1) +
scale_color_viridis_d() +
ggrepel::geom_label_repel(
aes(label = paste(country, ", ", decade, sep = ""),
parse = TRUE)) +
ggtitle("Proportion of Wins for Fifteens vs Sevens") +
xlab("sevens") +
ylab("fifteens")
win_loss %>%
group_by(country) %>%
mutate(count = n()) %>%
ungroup() %>%
filter(count >= 3) %>%
plotly::plot_ly(
x = ~prop_win_7,
y = ~prop_win_15,
color = ~country,
hoverinfo = 'text',
text = ~paste(country, '</br><br>', decade)
) %>%
plotly::layout(title="Proportion of Wins for Fifteens vs Sevens",
xaxis = list(title = "sevens"),
yaxis = list(title = "fifteens"))
win_loss %>%
group_by(country) %>%
mutate(count = n()) %>%
ungroup() %>%
filter(count >= 3) %>%
ggplot(aes(x = prop_win_7, y = prop_win_15)) +
geom_point(aes(color = country)) +
geom_abline(intercept = 0, slope = 1) +
scale_color_viridis_d() +
gganimate::transition_states(decade) +
ggrepel::geom_label_repel(aes(label = country)) +
ggtitle("Proportion of Wins for Fifteens vs Sevens",
subtitle = 'for the decade {closest_state}') +
xlab("sevens") +
ylab("fifteens")