The data this week comes from babynames
R package from Hadley Wickham. Note that other datasets exist, like the nzbabynames
package from Emily Kothe.
babynames <- read_csv("babynames.csv") %>%
janitor::clean_names()
nz_baby <- read_csv("nz_names.csv") %>%
janitor::clean_names() %>%
mutate(sex = case_when(
sex == "Female" ~ "F",
sex == "Male" ~ "M",
TRUE ~ sex
))
maori_baby <- read_csv("maorinames.csv") %>%
janitor::clean_names() %>%
mutate(sex = case_when(
sex == "Female" ~ "F",
sex == "Male" ~ "M",
TRUE ~ sex
))
all_names <- babynames %>%
inner_join(nz_baby, by = c("name" = "name", "sex" = "sex", "year" = "year")) %>%
mutate(nz_count = count, us_count = n)
pop_nz_names_F <- nz_baby %>%
filter(sex == "F") %>%
group_by(name) %>%
summarize(nz_count = sum(count)) %>%
arrange(nz_count) %>%
top_n(10) %>%
select(name) %>%
pull()
pop_nz_names_M <- nz_baby %>%
filter(sex == "M") %>%
group_by(name) %>%
summarize(nz_count = sum(count)) %>%
arrange(nz_count) %>%
top_n(10) %>%
select(name) %>%
pull()
The most poisoned baby name in history, read this.
babynames %>%
filter(name == "Hilary") %>%
filter(sex == "F") %>%
ggplot() +
geom_line(aes(x = year, y = n)) +
geom_vline(xintercept = 1992)
all_names %>%
filter(name %in% pop_nz_names_F) %>%
filter(sex == "F") %>%
ggplot(aes(x = year)) +
geom_line(aes(y = nz_count*100, color = "NZ")) +
geom_line(aes(y = us_count, color = "US")) +
scale_y_continuous(sec.axis = sec_axis(~./100, name = "Name Count in NZ") ) +
scale_color_manual(values = c("#52854C", "#CC79A7")) +
labs(y = "Name Count in US",
x = "",
color = "Country",
title = "Comparison of female name popularity",
subtitle = "top 10 most popular NZ female names over all years") +
facet_wrap(~name)
all_names %>%
filter(name %in% pop_nz_names_M) %>%
filter(sex == "M") %>%
ggplot(aes(x = year)) +
geom_line(aes(y = nz_count*100, color = "NZ")) +
geom_line(aes(y = us_count, color = "US")) +
scale_y_continuous(sec.axis = sec_axis(~./100, name = "Name Count in NZ") ) +
scale_color_manual(values = c("#52854C", "#CC79A7")) +
labs(y = "Name Count in US",
x = "",
color = "Country",
title = "Comparison of male name popularity",
subtitle = "top 10 most popular NZ male names over all years") +
facet_wrap(~name)
all_names %>%
filter(year == 2017) %>%
ggplot() +
geom_point(aes(x = us_count, y = nz_count, color = sex)) +
coord_trans(x = "log10", y = "log10")