library(tidyverse)
library(tidytext)
library(praise)
Bob Ross Paintings
The Data
The data this week comes from Jared Wilber’s data on Bob Ross Paintings via @frankiethull Bob Ross Colors data package.
This is data from the paintings of Bob Ross featured in the TV Show ‘The Joy of Painting’.
<- readr::read_csv("bob_ross.csv")
bob_ross
<- c('#4E1500', '#000000', '#DB0000', '#8A3324', '#FFEC00', '#5F2E1F', '#CD5C5C', '#FFB800', '#000000', '#FFFFFF', '#000000', '#0C0040', '#102E3C', '#021E44', '#0A3410', '#FFFFFF', '#221B15', '#C79B00') hex_color
Grouping the paintings
I’d like to group the paintings by their titles. That is, what are the most frequent words used to describe the paintings and can we categorize them into a few groups based on those words?
<- bob_ross %>%
top_words select(painting_title) %>%
::str_split(boundary("word")) %>%
stringras.data.frame() %>%
rename(word = `c..c....A....Walk....in....the....Woods....Mt....McKinley....Ebony...`) %>%
mutate(word = tolower(word)) %>%
anti_join(stop_words, by = c("word" = "word")) %>%
table() %>%
sort(decreasing = TRUE) %>%
head(20) %>%
names()
<- paste(top_words, collapse = "|")
top_words_or
<- function(word){
inword %>%
bob_ross mutate(painting_title = tolower(painting_title)) %>%
mutate(word_group = str_detect(painting_title, word)) %>%
select(word_group)}
%>%
top_words map(inword) %>%
bind_cols() %>%
::set_colnames(top_words) %>%
magrittrcbind(bob_ross) %>%
pivot_longer(cols = mountain:quiet, names_to = "word", values_to = "exist") %>%
filter(exist) %>% # keep only the TRUE for each word
group_by(season, word) %>%
summarize(across(Black_Gesso:Alizarin_Crimson, sum)) %>%
pivot_longer(cols = Black_Gesso:Alizarin_Crimson, names_to = "paint_col",
values_to = "count") %>%
ggplot(aes(x = season, y = count, fill = paint_col)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = hex_color) +
facet_wrap(~word)
[‘Alizarin Crimson’, ‘Black Gesso’, ‘Bright Red’, ‘Burnt_Umber’, ‘Cadmium Yellow’, ‘Dark Sienna’,‘Indian_Red’, ‘Indian Yellow’, ‘Liquid Black’, ‘Liquid Clear’, ‘Midnight Black’, ‘Phthalo Blue’, ‘Phthalo Green’, ‘Prussian Blue’, ‘Sap Green’, ‘Titanium White’, ‘Van Dyke Brown’, ‘Yellow Ochre’]
%>%
bob_ross group_by(season) %>%
summarize(across(Black_Gesso:Alizarin_Crimson, sum)) %>%
pivot_longer(cols = Black_Gesso:Alizarin_Crimson, names_to = "paint_col",
values_to = "count") %>%
ggplot(aes(x = season, y = count, fill = paint_col)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = hex_color)