library(tidyverse)
library(tidytext)
library(praise)
library(scales)Premier League Soccer
The Data
The data this week comes from the Premier League Match Data 2021-2022 via Evan Gower on Kaggle.
soccer <- read_csv("soccer21-22.csv")Half time vs Full time
soccer |>
select(HTR, FTR) |>
table() FTR
HTR A D H
A 77 14 10
D 44 51 56
H 8 23 97
soccer |>
ggplot(aes(x = FTR, fill = HTR)) +
geom_bar()
# install.packages("remotes")
# remotes::install_github("davidsjoberg/ggsankey")
library(ggsankey)
soccer_sankey <- soccer |>
make_long(HTR, FTR)
soccer_sankey |>
ggplot(aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = node, label = node)) +
geom_sankey(flow.alpha = 0.5, node.color = "gray30") +
geom_sankey_label(size = 2, color = "white", fill = "gray40") +
theme_void() +
theme(legend.position = "none") 
PCA
library(ggfortify)
soccer_pca <- soccer |>
dplyr::select(HS,AS, HST, AST, HF, AF, HC, AC, HY, AY, HR, AR) |>
prcomp(scale. = TRUE)soccer_pca |>
autoplot(data = soccer, loadings = TRUE, loadings.label = TRUE,
color = "FTR")