library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(bobsburgersR)
library(praise)
Bob’s Burgers
The Data
This week we’re exploring Bob’s Burgers dialogue! Thank you to Steven Ponce for the data, and a blog post demonstrating how to visualize the data!
See the {bobsburgersR} R Package for the original transcript data, as well as additional information about each episode!
<- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-11-19/episode_metrics.csv') episode_metrics
Dialogue
How often are particular words used in a particular episode? Can we see any patterns over season and episode?
We start with some gutter humor…
<- c("fart", "crap", "toilet", "buns", "diarrhea",
words "poop", "(doo doo)")
<- str_c("(?i)", str_c(words, collapse = "|"))
list list
[1] "(?i)fart|crap|toilet|buns|diarrhea|poop|(doo doo)"
<- transcript_data |>
poop_data filter(str_detect(raw_text, list)) |>
filter(!str_detect(raw_text, "(?i)farth"))
|>
poop_data group_by(season, episode) |>
summarize(num_wrds = n()) |>
full_join(imdb_wikipedia_data,
by = c("season", "episode")) |>
ggplot(aes(y = num_wrds, x = episode,
color = as.factor(season))) +
geom_point(size = 3, show.legend = FALSE, alpha = 0.8) +
facet_wrap(~season) +
labs(y = "number of poop words",
title = "Dialogue from Bob's Burgers")
<- c("burger")
words
<- str_c("(?i)", str_c(words, collapse = "|"))
list list
[1] "(?i)burger"
<- transcript_data |>
other_data drop_na(raw_text) |>
mutate(word_in = str_detect(raw_text, list))
|>
other_data group_by(season, episode) |>
summarize(num_wrds = sum(word_in)) |>
full_join(imdb_wikipedia_data,
by = c("season", "episode")) |>
ggplot(aes(y = num_wrds, x = episode)) +
geom_text(aes(label = "hamburger"), size = 3,
color = "darkorange", family = "Font Awesome 5 Free Solid") +
facet_wrap(~season) +
labs(subtitle = "number of times burger is said",
title = "Dialogue from Bob's Burgers")
praise()
[1] "You are best!"