Code
library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)This week we’re exploring Bird Sightings at Sea! The data this week comes from Te Papa Tongarewa, The Museum of New Zealand. It consists of log book entries of bird sightings at sea near New Zealand, from 1969 to 1990.
Thank you to David Hood for the dataset suggestion.
The data was recorded using guidelines for the Australasian Seabird Mapping Scheme and counts seabirds seen from a ship during a 10 minute period. The data includes geolocations of the sightings, bird species, numbers and behaviour, observer and ship name, and observation date and time.
species_common_name in the birds dataset always match up with the split columns such as species_scientific_name, age, wan_plumage_phase, plumage_phase, and sex.beaufort_scale <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-04-14/beaufort_scale.csv')
birds <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-04-14/birds.csv') |>
mutate(genus = stringr::str_extract(species_scientific_name, "\\w+"))
sea_states <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-04-14/sea_states.csv')
ships <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-04-14/ships.csv')birds |>
group_by(genus) |>
drop_na(genus) |>
summarize(count_genus = n()) |>
arrange(desc(count_genus))# A tibble: 47 × 2
genus count_genus
<chr> <int>
1 Diomedea 15375
2 Puffinus 6238
3 Pterodroma 4995
4 Thalassarche 3231
5 Daption 2966
6 Macronectes 2526
7 Pachyptila 2457
8 Larus 2366
9 Procellaria 1786
10 Morus 1679
# ℹ 37 more rows
birds |>
group_by(genus) |>
drop_na(genus) |>
mutate(count_genus = n()) |>
filter(count_genus > 1000)# A tibble: 43,619 × 28
# Groups: genus [10]
bird_observation_id record_id species_common_name species_scientific_n…¹
<dbl> <dbl> <chr> <chr>
1 1 1083001 Royal / Wandering albat… Diomedea epomophora /…
2 2 1083001 Black-browed albatross … Diomedea impavida / m…
3 3 1083001 Cape petrel Daption capense
4 4 1083001 Fairy prion Pachyptila turtur
5 5 1083001 Sooty shearwater Puffinus griseus
6 6 1084001 Royal albatross sensu l… Diomedea epomophora /…
7 7 1084001 Black-browed albatross … Diomedea impavida / m…
8 8 1084001 Sooty shearwater Puffinus griseus
9 9 1084002 Royal albatross sensu l… Diomedea epomophora /…
10 10 1084002 Black-browed albatross … Diomedea impavida / m…
# ℹ 43,609 more rows
# ℹ abbreviated name: ¹species_scientific_name
# ℹ 24 more variables: species_abbreviation <chr>, age <chr>,
# wan_plumage_phase <chr>, plumage_phase <chr>, sex <lgl>, count <dbl>,
# n_feeding <dbl>, feeding <lgl>, n_sitting_on_water <dbl>,
# sitting_on_water <lgl>, n_sitting_on_ice <dbl>, sitting_on_ice <lgl>,
# sitting_on_ship <lgl>, in_hand <lgl>, n_flying_past <dbl>, …
birds |>
filter(count != 99999) |>
mutate(countTF = (!is.na(n_feeding) + 0) +
(!is.na(n_sitting_on_water) + 0) +
(!is.na(n_sitting_on_ice) + 0) +
(!is.na(n_flying_past) + 0) +
(!is.na(n_accompanying) + 0) +
(!is.na(n_following_ship) + 0)) |>
filter(countTF == 6) |>
filter(genus == "Puffinus" | genus == "Diomedea") |>
pivot_longer(c(n_feeding, n_sitting_on_ice, n_sitting_on_water,
n_flying_past, n_accompanying, n_following_ship),
values_to = "number", names_to = "where") |>
filter(number > 0) |>
mutate(log10count = log10(number)) |>
left_join(ships, by = "record_id") |>
filter(date > "1975-01-01", date < "1990-01-01") |>
ggplot(aes(x = date, y = where, size = log10count, color = genus)) +
geom_jitter(alpha = 0.5, width = 0) +
scale_color_brewer(palette = "Set1")praise()[1] "You are unreal!"