library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(ggpattern)
library(praise)
Missing CDC Data
The Data
This week we’re exploring datasets that the Trump administration has purged.
An effort is underway to back up this publicly funded data before it is lost. This week’s dataset contains metadata about CDC datasets backed up on archive.org.
“The removal of HIV- and LGBTQ-related resources from the websites of the Centers for Disease Control and Prevention and other health agencies is deeply concerning and creates a dangerous gap in scientific information and data to monitor and respond to disease outbreaks,” the Infectious Disease Society of America said in a statement. “Access to this information is crucial for infectious diseases and HIV health care professionals who care for people with HIV and members of the LGBTQ community and is critical to efforts to end the HIV epidemic.”
<- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-11/cdc_datasets.csv')
cdc_datasets <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-11/fpi_codes.csv')
fpi_codes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-02-11/omb_codes.csv') omb_codes
Combining data
|>
cdc_datasets select(program_code, public_access_level) |>
left_join(fpi_codes, by = c("program_code" = "program_code_pod_format")) |>
ggplot(aes(x = program_name, fill = program_name)) +
geom_bar() +
theme(legend.position="bottom",
axis.text.x = element_blank()) +
guides(fill = guide_legend(nrow = 8)) +
labs(x = "", y = "", fill = "")
|>
cdc_datasets ggplot(aes(x = public_access_level)) +
geom_bar() +
coord_flip() +
facet_wrap(~bureau_code) +
theme_minimal() +
labs(y = "", x = "")
|>
cdc_datasets select(program_code, public_access_level) |>
ggplot(aes(x = program_code, fill = program_code,
pattern = public_access_level)) +
geom_bar_pattern(color = "black", pattern_density = 0.15,) +
theme(legend.position="bottom",
axis.text.x = element_blank()) +
guides(fill = guide_legend(nrow = 11),
color = guide_legend(nrow = 16),
pattern_fill = guide_legend(nrow = 10),
pattern = guide_legend(nrow = 10)) +
labs(x = "", y = "", fill = "", color = "", pattern_fill = "", pattern = "")
|>
cdc_datasets select(program_code, public_access_level) |>
left_join(fpi_codes, by = c("program_code" = "program_code_pod_format")) |>
ggplot(aes(x = program_name, fill = program_name,
pattern = public_access_level)) +
geom_bar_pattern(color = "black", pattern_density = 0.15,) +
theme(legend.position="bottom",
axis.text.x = element_blank()) +
guides(fill = guide_legend(nrow = 12),
color = guide_legend(nrow = 12),
pattern_fill = guide_legend(nrow = 12),
pattern = guide_legend(nrow = 12)) +
labs(x = "", y = "", fill = "", color = "", pattern_fill = "", pattern = "")
<- cdc_datasets |>
p group_by(category) |>
mutate(count = n()) |>
filter(count >= 15) |>
ggplot(aes(x = category, fill = category,
pattern = public_access_level)) +
geom_bar_pattern(color = "black") +
theme(legend.position="bottom",
axis.text.x = element_blank()) +
guides(fill = guide_legend(nrow = 8),
color = guide_legend(nrow = 8),
pattern_fill = guide_legend(nrow = 8),
pattern = guide_legend(nrow = 8)) +
labs(x = "", y = "", fill = "", color = "", pattern_fill = "", pattern = "")
p
praise()
[1] "You are super-duper!"