library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)
library(reticulate)
use_python("/Users/jsh04747/miniforge3/bin/python3", required = TRUE)
Global Holidays
Some notes to myself about how to install Python packages.
To install a package in Python:
In R:
reticulate::py_install("matplotlib")
In bash:
pip install matplotlib
or
python3 -m pip install matplotlib
In Python:
!pip install matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
The Data
This week we’re exploring how global holidays impact seasonal human mobility. We found the data via the article “Global holiday datasets for understanding seasonal human mobility and population dynamics” by Shengjie Lai (et al) (thank you to @lgibson7 for finding the dataset).
Public and school holidays have important impacts on population mobility and dynamics across multiple spatial and temporal scales, subsequently affecting the transmission dynamics of infectious diseases and many socioeconomic activities. However, worldwide data on public and school holidays for understanding their changes across regions and years have not been assembled into a single, open-source and multitemporal dataset. To address this gap, an open access archive of data on public and school holidays in 2010–2019 across the globe at daily, weekly, and monthly timescales was constructed. Airline passenger volumes across 90 countries from 2010 to 2018 were also assembled to illustrate the usage of the holiday data for understanding the changing spatiotemporal patterns of population movements.
Sources:
Lai S., Sorichetta A. and WorldPop (2020). Global Public and School Holidays 2010-2019. Mapping seasonal denominator dynamics in low- and middle-income settings, and Exploring the seasonality of COVID-19, funded by The Bill and Melinda Gates Foundation.
Lai S., Sorichetta A. and WorldPop (2020). Monthly volume of airline passengers in 90 countries 2010-2018. Mapping seasonal denominator dynamics in low- and middle-income settings, and Exploring the seasonality of COVID-19, funded by The Bill and Melinda Gates Foundation.
In R
library(countrycode)
<- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-12-24/monthly_passengers.csv')
monthly_passengers
<- data.frame(
iso_continent ISO3 = unique(countrycode::codelist$iso3c),
continent = countrycode(unique(countrycode::codelist$iso3c), "iso3c", "continent")
)
<- monthly_passengers |>
plot_data left_join(iso_continent, by = c("ISO3" = "ISO3")) |>
mutate(month_year = ymd(paste(Year, Month, "01", sep = "-")))
|>
plot_data ggplot(aes(x = month_year, y = Total, group = ISO3, color = continent)) +
geom_point(alpha = 0.5) +
geom_line() +
geom_text(data = filter(plot_data, Year == 2010, Month == 1),
mapping = aes(label = ISO3, y = Total),
x = ymd(paste("2009", "07","01", sep = "-")),
size = 2) +
scale_y_log10() +
xlim(c(ymd("2009-07-01"), ymd("2018-10-01"))) +
labs(y = "Total number of air passengers in thousands",
x = "Date",
color = "Continent",
title = "Monthly volume of airline passengers across \ndifferent countries 2010-2018") +
theme_bw()
In Python
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-12-24/monthly_passengers.csv')
monthly_passengers
print(monthly_passengers)
ISO3 Year Month Total Domestic International Total_OS
0 ALB 2010 1 NaN NaN NaN 117.350
1 ALB 2010 2 NaN NaN NaN 86.535
2 ALB 2010 3 NaN NaN NaN 103.795
3 ALB 2010 4 NaN NaN NaN 102.038
4 ALB 2010 5 NaN NaN NaN 109.037
... ... ... ... ... ... ... ...
7237 ZAF 2017 8 NaN NaN NaN 3425.617
7238 ZAF 2017 9 NaN NaN NaN 3454.075
7239 ZAF 2017 10 NaN NaN NaN 3678.780
7240 ZAF 2017 11 NaN NaN NaN 3482.341
7241 ZAF 2017 12 NaN NaN NaN 3812.322
[7242 rows x 7 columns]
pip install pycountry pycountry-convert
import pycountry_convert as pc
import numpy as np
def get_continent(iso3):
try:
# Convert ISO3 to ISO2
= pc.country_alpha3_to_country_alpha2(iso3)
iso2 # Get continent code
= pc.country_alpha2_to_continent_code(iso2)
continent_code # Map continent codes to names
= {
continents "AF": "Africa",
"AS": "Asia",
"EU": "Europe",
"NA": "North America",
"SA": "South America",
"OC": "Oceania",
"AN": "Antarctica"
}return continents[continent_code]
except Exception:
return "Unknown"
# Apply the function to the ISO3 column
'Continent'] = monthly_passengers['ISO3'].apply(get_continent)
monthly_passengers[
'month_year'] = pd.to_datetime(monthly_passengers['Year'].astype(str) + '-' + monthly_passengers['Month'].astype(str), format='%Y-%m')
monthly_passengers[
print(monthly_passengers)
ISO3 Year Month Total ... International Total_OS Continent month_year
0 ALB 2010 1 NaN ... NaN 117.350 Europe 2010-01-01
1 ALB 2010 2 NaN ... NaN 86.535 Europe 2010-02-01
2 ALB 2010 3 NaN ... NaN 103.795 Europe 2010-03-01
3 ALB 2010 4 NaN ... NaN 102.038 Europe 2010-04-01
4 ALB 2010 5 NaN ... NaN 109.037 Europe 2010-05-01
... ... ... ... ... ... ... ... ... ...
7237 ZAF 2017 8 NaN ... NaN 3425.617 Africa 2017-08-01
7238 ZAF 2017 9 NaN ... NaN 3454.075 Africa 2017-09-01
7239 ZAF 2017 10 NaN ... NaN 3678.780 Africa 2017-10-01
7240 ZAF 2017 11 NaN ... NaN 3482.341 Africa 2017-11-01
7241 ZAF 2017 12 NaN ... NaN 3812.322 Africa 2017-12-01
[7242 rows x 9 columns]
= monthly_passengers['Continent'].unique()
categories = plt.cm.tab10(np.linspace(0, 1, len(categories)))
colors
= {continent: i for i, continent in enumerate(categories)}
continent_to_color = set()
handled_continents
=(10, 6))
plt.figure(figsize
for iso3 in monthly_passengers['ISO3'].unique():
= monthly_passengers[monthly_passengers['ISO3'] == iso3]
country_data = country_data['Continent'].iloc[0]
continent = colors[continent_to_color[continent]]
color
= continent if continent not in handled_continents else None
label
plt.plot('month_year'],
country_data['Total'],
country_data[=label,
label=color,
color='o',
marker=3,
markersize=0.5)
alpha
handled_continents.add(continent)
'Date')
plt.xlabel('Total number of air passengers in thousands')
plt.ylabel('Monthly volume of airline passengers across \ndifferent countries 2010-2018')
plt.title('log')
plt.yscale(="Continent", loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0)
plt.legend(title
plt.tight_layout()
"holidaytravel.png", dpi=300, bbox_inches='tight')
plt.savefig(
plt.show()
praise()
[1] "You are bee's knees!"