library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)
library(scales)
library(tidytext)
library(devtools)
library(ggwordcloud)
library(png)
library(svglite)
Shakespeare Dialogue
Data
This week we’re exploring dialogue in Shakespeare plays. The dataset this week comes from shakespeare.mit.edu (via github.com/nrennie/shakespeare) which is the Web’s first edition of the Complete Works of William Shakespeare. The site has offered Shakespeare’s plays and poetry to the internet community since 1993.
<- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-09-17/hamlet.csv')
hamlet <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-09-17/macbeth.csv')
macbeth <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-09-17/romeo_juliet.csv') romeo_juliet
Romeo & Juliet
After removing stop words, we create wordclouds describing the most common works for both Romeo and Juliet in Romeo & Juliet. Tha anlysis is taken from @deepdk.
<- romeo_juliet |>
romeo_juliet filter(character %in% c("Romeo", "Juliet"))
# Create a custom list of words to exclude
<- data.frame(word = "thou", "thy", "thee", "thine", "art", "hast", "dost", "ere", "o","hath") custom_stop_words
<- romeo_juliet |>
word_counts unnest_tokens(word, dialogue) |>
anti_join(stop_words) |> # Remove common stop words
filter(!str_detect(word, "^[0-9]+$")) |> # Remove numbers
anti_join(custom_stop_words) |> # Remove custom words
mutate(word = stringr::str_replace(word, "'s", "")) |>
count(character, word, sort = TRUE)
word_counts
# A tibble: 1,957 × 3
character word n
<chr> <chr> <int>
1 Romeo love 52
2 Juliet romeo 41
3 Romeo thy 41
4 Romeo thee 38
5 Juliet love 35
6 Juliet thee 33
7 Juliet thy 32
8 Juliet night 30
9 Romeo death 22
10 Juliet nurse 20
# ℹ 1,947 more rows
<- word_counts |>
juliet filter(character == "Juliet")
<- word_counts |>
romeo filter(character == "Romeo")
We wanted to use different fonts, so we load in the MedievalSharp
font from Google.
::font_add_google("MedievalSharp", "MedievalSharp")
sysfonts::showtext_auto()
showtext
<- "MedievalSharp" my_font
|>
juliet #filter(n > 1) |>
ggplot(aes(label = word, size = n, color = n)) +
#ggwordcloud::geom_text_wordcloud(shape = "cardioid")
::geom_text_wordcloud_area(
ggwordcloudmask = png::readPNG("FlipAlphaShakespeare.png"),
rm_outside = TRUE,
family = my_font
+
) scale_size_area(max_size = 20) +
theme_minimal() +
scale_color_gradient(low = "#03c6fc", high = "#5203fc") +
labs(title = "Shakespeare in Words") +
theme(
plot.title = ggtext::element_textbox_simple(
family = my_font),
plot.caption = ggtext::element_textbox_simple(
family = my_font) )
|>
romeo #filter(n > 1) |>
ggplot(aes(label = word, size = n, color = n)) +
#ggwordcloud::geom_text_wordcloud(shape = "cardioid")
::geom_text_wordcloud_area(
ggwordcloudmask = png::readPNG("AlphaShakespeare.png"),
rm_outside = TRUE,
family = my_font
+
) scale_size_area(max_size = 20) +
theme_minimal() +
labs(title = "Shakespeare in Words") +
theme(
plot.title = ggtext::element_textbox_simple(
family = my_font),
plot.caption = ggtext::element_textbox_simple(
family = my_font) ) +
scale_color_gradient(low = "#a5fc03", high = "#034efc")
praise()
[1] "You are swell!"