<- read_csv("UK Gender Pay Gap Data - 2017 to 2018.csv") |>
paygap17 mutate(year = 2017)
<- read_csv("UK Gender Pay Gap Data - 2018 to 2019.csv") |>
paygap18 mutate(year = 2018)
<- read_csv("UK Gender Pay Gap Data - 2019 to 2020.csv") |>
paygap19 mutate(year = 2019)
<- read_csv("UK Gender Pay Gap Data - 2020 to 2021.csv") |>
paygap20 mutate(year = 2020)
<- read_csv("UK Gender Pay Gap Data - 2021 to 2022.csv") |>
paygap21 mutate(year = 2021)
<- read_csv("UK Gender Pay Gap Data - 2022 to 2023.csv") |>
paygap22 mutate(year = 2022)
<- rbind(paygap17, paygap18, paygap19, paygap20, paygap21, paygap22) |>
paygap data.frame() |>
mutate(coSize = case_when(
== "1000 to 4999" ~ 1000,
EmployerSize == "20,000 or more" ~ 20000,
EmployerSize == "250 to 499" ~ 250,
EmployerSize == "500 to 999" ~ 500,
EmployerSize == "5000 to 19,999" ~ 5000,
EmployerSize == "Less than 250" ~ 100,
EmployerSize == "Not Provided" ~ NA_real_,
EmployerSize TRUE ~ NA_real_
))
Gender Pay Gap in the UK
Data
The data this week comes from gender-pay-gap.service.gov.uk. The online tool reports by gender and occupation.
3D plots
For my research project, I’m practicing plotting (scatterplots) data in 3D. Maybe even 4D with colors and 5D with colors and faceting!
Admittedly terrible, I’m less concerned with the content of today’s data, and I’m focused on the plotting. Apologies for not being more careful about repeated measures, correlated variables, and units. The topic is important and deserves careful consideration!
After a huge amount of time trying to use subplot()
on scatter3d
, I was finally able to show the relationship across years. It isn’t perfect, but it seems like it could be useful for trying to understand relationships between (in this case) 5 different quantitative variables.
<- list()
plots
# custom grid style
<- list(
axx gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=TRUE,
backgroundcolor='rgb(230, 230,230)'
)
for (i in 1:length(unique(paygap$year))){
= paygap |>
plots[[i]] filter(year == unique(paygap$year)[i]) |>
plot_ly(x = ~DiffMeanHourlyPercent,
y = ~coSize,
z = ~FemaleBonusPercent,
mode = "markers",
type = "scatter3d",
color = ~FemaleLowerQuartile,
#color = ~as.factor(year),
#colors = c("red", "green", "blue", "black")[i],
scene = paste("scene",i,sep="")) %>%
add_annotations(
text = unique(paygap$year)[i], # hard coded to work for 4 years
x = c(0.5, 1, 0, 1)[i],
y = c(1, 1, 0.5, 0.5)[i],
yref = "paper",
xref = "paper",
#xanchor = "middle",
#yanchor = "top",
showarrow = FALSE,
font = list(size = 15)
)
}
<- subplot(plots[[1]],
fig 2]],
plots[[3]],
plots[[4]]) %>%
plots[[::layout(title = "3D Scatterplots",
plotlyscene = list(domain=list(x=c(0,0.5),y=c(0.5,1)), #needs to be scene and not scene1
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'),
scene2 = list(domain=list(x=c(0.5,1),y=c(0.5,1)),
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'),
scene3 = list(domain=list(x=c(0, 0.5),y=c(0,0.5)),
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'),
scene4 = list(domain=list(x=c(0.5,1),y=c(0, 0.5)),
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'))
fig
<- list()
plots
# custom grid style
<- list(
axx gridcolor='rgb(255, 255, 255)',
zerolinecolor='rgb(255, 255, 255)',
showbackground=TRUE,
backgroundcolor='rgb(230, 230,230)'
)
for (i in 1:length(unique(paygap$year))){
= paygap |>
plots[[i]] filter(year == unique(paygap$year)[i]) |>
plot_ly(x = ~DiffMeanHourlyPercent,
y = ~coSize,
z = ~FemaleBonusPercent,
mode = "markers",
type = "scatter3d",
color = ~as.factor(year),
colors = c("red", "green", "blue", "black")[i],
scene = paste("scene",i,sep="")) %>%
add_annotations(
text = unique(paygap$year)[i], # hard coded to work for 4 years
x = c(0.5, 1, 0, 1)[i],
y = c(1, 1, 0.5, 0.5)[i],
yref = "paper",
xref = "paper",
#xanchor = "middle",
#yanchor = "top",
showarrow = FALSE,
font = list(size = 15)
)
}
<- subplot(plots[[1]],
fig 2]],
plots[[3]],
plots[[4]]) %>%
plots[[::layout(title = "3D Scatterplots",
plotlyscene = list(domain=list(x=c(0,0.5),y=c(0.5,1)), #needs to be scene and not scene1
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'),
scene2 = list(domain=list(x=c(0.5,1),y=c(0.5,1)),
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'),
scene3 = list(domain=list(x=c(0, 0.5),y=c(0,0.5)),
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'),
scene4 = list(domain=list(x=c(0.5,1),y=c(0, 0.5)),
#xaxis=axx, yaxis=axx, zaxis=axx,
aspectmode='cube'))
fig