The Data

The data this week comes from the BLS, specifically table cpsaat17 across several years.

employed <- read_csv("employed.csv")
earn <- read_csv("earn.csv")

Variables

Before plotting the data, let’s see what variables we are working with.

employed <- employed %>%
  filter(!industry %in% c("Women", "Men", "Asian", "Black or African American", "White",
                          "Other services, except private households",
                          "Private households", "Wholesale trade",
                          "Retail trade")) %>%
  mutate(industry = case_when(
    str_detect(industry, "Mining") ~ "Mining",
    TRUE ~ industry)) 

employed %>%
  select(industry, year) %>%
  table()
##                                     year
## industry                             2015 2016 2017 2018 2019 2020
##   Agriculture and related              66   66   66   66   66   66
##   Construction                         66   66   66   66   66   66
##   Durable goods                        66   66   66   66   66   66
##   Education and health services        66   66   66   66   66   66
##   Financial activities                 66   66   66   66   66   66
##   Information                          66   66   66   66   66   66
##   Leisure and hospitality              66   66   66   66   66   66
##   Manufacturing                        66   66   66   66   66   66
##   Mining                               66   66   66   66   66   66
##   Nondurable goods                     66   66   66   66   66   66
##   Other services                       66   66   66   66   66   66
##   Professional and business services   66   66   66   66   66   66
##   Public administration                66   66   66   66   66   66
##   Transportation and utilities         66   66   66   66   66   66
##   Wholesale and retail trade           66   66   66   66   66   66
employed %>%
  select(race_gender) %>%
  table()
## .
##                     Asian Black or African American                       Men 
##                       990                      1056                      1056 
##                     TOTAL                     White                     Women 
##                      1056                      1056                      1056
employ_gender <- employed %>%
  filter(race_gender %in% c("Men", "Women")) %>%
  pivot_wider(names_from = race_gender, values_from = employ_n)

Plotting

For a while, I’ve been considering making a barbell plot, and I think today’s data will work for it!

employed %>%
  filter(race_gender == "TOTAL") %>%
  filter(!is.na(industry)) %>%
  group_by(industry, year) %>%
  summarize(employ_tot = sum(employ_n)) %>%
  ggplot() +
  geom_line(aes(x = year, y = employ_tot, color = industry))

employ_gender %>%
  filter(!is.na(industry)) %>%
  group_by(industry, year) %>%
  summarize(men_tot = sum(Men, na.rm = TRUE), women_tot = sum(Women, na.rm = TRUE)) %>%
  mutate(dummyvar = ifelse(industry == "Mining", "a" ,"b")) %>%
  ggplot() +
  geom_segment(aes(x = year, xend = year, y = men_tot, yend = women_tot), color = "black") +
  geom_point(aes(x = year, y = women_tot, color = dummyvar)) +
  geom_point(aes(x = year, y = men_tot), color = "Dodgerblue") +
  geom_point(aes(x =year, y = women_tot), color = "yellow") +
  coord_flip() +
  ylab("Total Number Employed") +
  scale_color_manual("gender", values = c("Dodgerblue", "yellow"), label = c("men", "women")) +
  facet_wrap(~industry, ncol = 1, scale = "free_y")

praise()
## [1] "You are remarkable!"