library(tidyverse)
library(tidytext)
library(praise)
library(scales)
Premier League Soccer
The Data
The data this week comes from the Premier League Match Data 2021-2022 via Evan Gower on Kaggle.
<- read_csv("soccer21-22.csv") soccer
Half time vs Full time
|>
soccer select(HTR, FTR) |>
table()
FTR
HTR A D H
A 77 14 10
D 44 51 56
H 8 23 97
|>
soccer ggplot(aes(x = FTR, fill = HTR)) +
geom_bar()
# install.packages("remotes")
# remotes::install_github("davidsjoberg/ggsankey")
library(ggsankey)
<- soccer |>
soccer_sankey make_long(HTR, FTR)
|>
soccer_sankey ggplot(aes(x = x, next_x = next_x,
node = node, next_node = next_node,
fill = node, label = node)) +
geom_sankey(flow.alpha = 0.5, node.color = "gray30") +
geom_sankey_label(size = 2, color = "white", fill = "gray40") +
theme_void() +
theme(legend.position = "none")
PCA
library(ggfortify)
<- soccer |>
soccer_pca ::select(HS,AS, HST, AST, HF, AF, HC, AC, HY, AY, HR, AR) |>
dplyrprcomp(scale. = TRUE)
|>
soccer_pca autoplot(data = soccer, loadings = TRUE, loadings.label = TRUE,
color = "FTR")