The Data

The data on Women’s Rugby comes from ScrumQueens by way of Jacquie Tran.

fifteens <- read_csv("fifteens.csv")
sevens <- read_csv("sevens.csv")

Over time

Are some countries better at sevens than fifteens? Does it change over time?

sevens_win <- sevens %>%
  mutate(year = year(date)) %>%
  mutate(decade = floor(year/10)*10) %>%
  group_by(decade, winner) %>%
  summarize(no_wins_7 = n()) %>%
  rename(country = winner)

sevens_loss <- sevens %>%
  mutate(year = year(date)) %>%
  mutate(decade = floor(year/10)*10) %>%
  group_by(decade, loser) %>%
  summarize(no_loss_7 = n()) %>%
  rename(country = loser)

sevens_win_loss <- sevens_win %>%
  full_join(sevens_loss, by = c("decade", "country")) %>%
  mutate(prop_win_7 = no_wins_7 / (no_wins_7 + no_loss_7))
fifteens_win <- fifteens %>%
  mutate(year = year(date)) %>%
  mutate(decade = floor(year/10)*10) %>%
  group_by(decade, winner) %>%
  summarize(no_wins_15 = n()) %>%
  rename(country = winner)

fifteens_loss <- fifteens %>%
  mutate(year = year(date)) %>%
  mutate(decade = floor(year/10)*10) %>%
  group_by(decade, loser) %>%
  summarize(no_loss_15 = n()) %>%
  rename(country = loser)

fifteens_win_loss <- fifteens_win %>%
  full_join(fifteens_loss, by = c("decade", "country")) %>%
  mutate(prop_win_15 = no_wins_15 / (no_wins_15 + no_loss_15))
win_loss <- fifteens_win_loss %>%
  full_join(sevens_win_loss, by = c("decade", "country")) %>%
  drop_na(prop_win_15, prop_win_7)
win_loss %>%
  group_by(country) %>%
  mutate(count = n()) %>%
  ungroup() %>%
  filter(count >= 3) %>%
  ggplot(aes(x = prop_win_7, y = prop_win_15)) + 
  geom_point(aes(color = country)) + 
  geom_abline(intercept = 0, slope = 1) + 
  scale_color_viridis_d() +
  ggrepel::geom_label_repel(
    aes(label = paste(country, ", ", decade, sep = ""),
        parse = TRUE)) +
  ggtitle("Proportion of Wins for Fifteens vs Sevens") + 
  xlab("sevens") + 
  ylab("fifteens")

win_loss %>%
  group_by(country) %>%
  mutate(count = n()) %>%
  ungroup() %>%
  filter(count >= 3) %>% 
  plotly::plot_ly(
    x = ~prop_win_7,
    y = ~prop_win_15,
    color = ~country,
    hoverinfo = 'text',
    text = ~paste(country, '</br><br>', decade)
  ) %>%
  plotly::layout(title="Proportion of Wins for Fifteens vs Sevens",
             xaxis = list(title = "sevens"),
             yaxis = list(title = "fifteens"))
win_loss %>%
  group_by(country) %>%
  mutate(count = n()) %>%
  ungroup() %>%
  filter(count >= 3) %>%
  ggplot(aes(x = prop_win_7, y = prop_win_15)) + 
  geom_point(aes(color = country)) + 
  geom_abline(intercept = 0, slope = 1) + 
  scale_color_viridis_d() +
  gganimate::transition_states(decade) + 
  ggrepel::geom_label_repel(aes(label = country)) +
  ggtitle("Proportion of Wins for Fifteens vs Sevens",
          subtitle = 'for the decade {closest_state}') + 
  xlab("sevens") + 
  ylab("fifteens")
Scatterplot of countries (transitioning over time) with proportion of wins in sevens on the x-axis and proportion of wins in fifteens on the y-axis.

Over time, are some countries more successful at sevens or fifteens?