The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
The avilistr
package provides access to the AviList
Global Avian Checklist, the first unified global bird taxonomy. This
vignette demonstrates how to work with the data for common
ornithological and biodiversity analyses.
library(avilistr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ forcats 1.0.0 ✔ stringr 1.5.1
#> ✔ lubridate 1.9.4 ✔ tibble 3.2.1
#> ✔ purrr 1.0.4 ✔ tidyr 1.3.1
#> ✔ readr 2.1.5
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
The package provides three main datasets:
# Load the datasets
data(avilist_2025) # Complete dataset (26 fields)
data(avilist_2025_short) # Essential fields (~12 fields)
data(avilist_metadata) # Field descriptions
# Check data dimensions
cat("Full dataset:", nrow(avilist_2025), "records,", ncol(avilist_2025), "fields\n")
#> Full dataset: 33684 records, 26 fields
cat("Short dataset:", nrow(avilist_2025_short), "records,", ncol(avilist_2025_short), "fields\n")
#> Short dataset: 33684 records, 14 fields
# Count species by order (top 10)
species_by_order <- avilist_2025_short %>%
filter(Taxon_rank == "species") %>%
count(Order, sort = TRUE) %>%
head(10)
print(species_by_order)
#> # A tibble: 10 × 2
#> Order n
#> <chr> <int>
#> 1 Passeriformes 6705
#> 2 Apodiformes 472
#> 3 Psittaciformes 406
#> 4 Charadriiformes 392
#> 5 Piciformes 385
#> 6 Columbiformes 350
#> 7 Galliformes 307
#> 8 Accipitriformes 252
#> 9 Strigiformes 245
#> 10 Coraciiformes 185
# Visualize most diverse orders
ggplot(species_by_order, aes(x = reorder(Order, n), y = n)) +
geom_col(fill = "steelblue", alpha = 0.8) +
coord_flip() +
labs(
title = "Most Species-Rich Bird Orders",
subtitle = "Top 10 orders by number of species",
x = "Order",
y = "Number of Species",
caption = "Data: AviList Global Avian Checklist v2025"
) +
theme_minimal()
# Most diverse bird families
family_richness <- avilist_2025_short %>%
filter(Taxon_rank == "species") %>%
count(Family, Family_English_name, sort = TRUE) %>%
head(15)
print(family_richness)
#> # A tibble: 15 × 3
#> Family Family_English_name n
#> <chr> <chr> <int>
#> 1 Tyrannidae Tyrant Flycatchers and Allies 441
#> 2 Thraupidae Tanagers and Allies 390
#> 3 Trochilidae Hummingbirds 363
#> 4 Muscicapidae Chats, Old World Flycatchers, and Allies 352
#> 5 Columbidae Doves and Pigeons 350
#> 6 Furnariidae Ovenbirds and Woodcreepers 321
#> 7 Accipitridae Kites, Old World Vultures, Eagles, and Hawks 250
#> 8 Thamnophilidae Antbirds, Antshrikes, Antwrens, and Antvireos 239
#> 9 Picidae Woodpeckers 237
#> 10 Fringillidae Finches, Euphonias, and Allies 236
#> 11 Strigidae Owls 228
#> 12 Psittaculidae Old World Parrots 200
#> 13 Meliphagidae Honeyeaters 195
#> 14 Turdidae Thrushes and Allies 194
#> 15 Phasianidae Partridges, Pheasants, Grouse, and Allies 187
# Visualize family diversity
ggplot(family_richness, aes(x = reorder(Family_English_name, n), y = n)) +
geom_col(fill = "darkgreen", alpha = 0.8) +
coord_flip() +
labs(
title = "Most Species-Rich Bird Families",
subtitle = "Top 15 families by number of species",
x = "Family",
y = "Number of Species",
caption = "Data: AviList Global Avian Checklist v2025"
) +
theme_minimal() +
theme(axis.text.y = element_text(size = 10))
# Get all thrush species
thrushes <- avilist_2025_short %>%
filter(Family == "Turdidae", Taxon_rank == "species") %>%
select(Scientific_name, English_name_AviList)
cat("Number of thrush species:", nrow(thrushes), "\n")
#> Number of thrush species: 194
head(thrushes)
#> # A tibble: 6 × 2
#> Scientific_name English_name_AviList
#> <chr> <chr>
#> 1 Grandala coelicolor Grandala
#> 2 Sialia sialis Eastern Bluebird
#> 3 Sialia mexicana Western Bluebird
#> 4 Sialia currucoides Mountain Bluebird
#> 5 Pinarornis plumosus Boulder Chat
#> 6 Stizorhina finschi Finsch's Rufous Thrush
# Get all raptors (birds of prey)
raptor_families <- c("Accipitridae", "Falconidae", "Strigidae", "Tytonidae")
raptors <- avilist_2025_short %>%
filter(Family %in% raptor_families, Taxon_rank == "species") %>%
count(Family, Family_English_name, sort = TRUE)
print(raptors)
#> # A tibble: 4 × 3
#> Family Family_English_name n
#> <chr> <chr> <int>
#> 1 Accipitridae Kites, Old World Vultures, Eagles, and Hawks 250
#> 2 Strigidae Owls 228
#> 3 Falconidae Falcons and Caracaras 65
#> 4 Tytonidae Bay Owls and Barn Owls 17
# Find species with "Robin" in their name
robins <- avilist_2025_short %>%
filter(str_detect(English_name_AviList, "Robin"), Taxon_rank == "species") %>%
select(Scientific_name, English_name_AviList, Family) %>%
arrange(Family)
print(robins)
#> # A tibble: 100 × 3
#> Scientific_name English_name_AviList Family
#> <chr> <chr> <chr>
#> 1 Tychaedon coryphoeus Karoo Scrub Robin Muscicapidae
#> 2 Tychaedon leucosticta Forest Scrub Robin Muscicapidae
#> 3 Tychaedon signata Brown Scrub Robin Muscicapidae
#> 4 Tychaedon quadrivirgata Bearded Scrub Robin Muscicapidae
#> 5 Tychaedon barbata Miombo Scrub Robin Muscicapidae
#> 6 Cercotrichas podobe Black Scrub Robin Muscicapidae
#> 7 Cercotrichas galactotes Rufous-tailed Scrub Robin Muscicapidae
#> 8 Cercotrichas paena Kalahari Scrub Robin Muscicapidae
#> 9 Cercotrichas hartlaubi Brown-backed Scrub Robin Muscicapidae
#> 10 Cercotrichas leucophrys White-browed Scrub Robin Muscicapidae
#> # ℹ 90 more rows
# Explore a specific genus (Turdus)
turdus_species <- avilist_2025_short %>%
filter(str_detect(Scientific_name, "^Turdus "), Taxon_rank == "species") %>%
select(Scientific_name, English_name_AviList) %>%
arrange(Scientific_name)
cat("Number of Turdus species:", nrow(turdus_species), "\n")
#> Number of Turdus species: 105
head(turdus_species, 10)
#> # A tibble: 10 × 2
#> Scientific_name English_name_AviList
#> <chr> <chr>
#> 1 Turdus abyssinicus Abyssinian Thrush
#> 2 Turdus albicollis White-necked Thrush
#> 3 Turdus albocinctus White-collared Blackbird
#> 4 Turdus amaurochalinus Creamy-bellied Thrush
#> 5 Turdus ardosiaceus Eastern Red-legged Thrush
#> 6 Turdus arthuri Campina Thrush
#> 7 Turdus assimilis White-throated Thrush
#> 8 Turdus atrogularis Black-throated Thrush
#> 9 Turdus aurantius White-chinned Thrush
#> 10 Turdus bewsheri Comoro Thrush
# Summary of data completeness
data_completeness <- avilist_2025 %>%
summarise(
total_records = n(),
missing_scientific_names = sum(is.na(Scientific_name)),
missing_families = sum(is.na(Family)),
missing_orders = sum(is.na(Order)),
missing_avilist_names = sum(is.na(English_name_AviList))
)
print(data_completeness)
#> # A tibble: 1 × 5
#> total_records missing_scientific_names missing_families missing_orders
#> <int> <int> <int> <int>
#> 1 33684 0 46 0
#> # ℹ 1 more variable: missing_avilist_names <int>
# Compare AviList vs Clements naming
name_comparison <- avilist_2025 %>%
filter(Taxon_rank == "species") %>%
summarise(
total_species = n(),
has_avilist_name = sum(!is.na(English_name_AviList)),
has_clements_name = sum(!is.na(English_name_Clements_v2024)),
has_both_names = sum(!is.na(English_name_AviList) & !is.na(English_name_Clements_v2024)),
names_differ = sum(English_name_AviList != English_name_Clements_v2024, na.rm = TRUE)
)
print(name_comparison)
#> # A tibble: 1 × 5
#> total_species has_avilist_name has_clements_name has_both_names names_differ
#> <int> <int> <int> <int> <int>
#> 1 11131 11131 11026 11026 1906
# Examples where names differ between sources
name_differences <- avilist_2025 %>%
filter(
Taxon_rank == "species",
!is.na(English_name_AviList),
!is.na(English_name_Clements_v2024),
English_name_AviList != English_name_Clements_v2024
) %>%
select(Scientific_name, English_name_AviList, English_name_Clements_v2024) %>%
head(10)
print(name_differences)
#> # A tibble: 10 × 3
#> Scientific_name English_name_AviList English_name_Clements_v…¹
#> <chr> <chr> <chr>
#> 1 Apteryx rowi Okarito Kiwi Okarito Brown Kiwi
#> 2 Eudromia formosa Quebracho Crested Tinamou Quebracho Crested-Tinamou
#> 3 Eudromia elegans Elegant Crested Tinamou Elegant Crested-Tinamou
#> 4 Tinamus tao Grey Tinamou Gray Tinamou
#> 5 Crypturellus duidae Grey-legged Tinamou Gray-legged Tinamou
#> 6 Dendrocygna guttata Spotted Whistling Duck Spotted Whistling-Duck
#> 7 Dendrocygna eytoni Plumed Whistling Duck Plumed Whistling-Duck
#> 8 Dendrocygna viduata White-faced Whistling Duck White-faced Whistling-Du…
#> 9 Dendrocygna autumnalis Black-bellied Whistling Duck Black-bellied Whistling-…
#> 10 Dendrocygna arborea West Indian Whistling Duck West Indian Whistling-Du…
#> # ℹ abbreviated name: ¹English_name_Clements_v2024
# For large analyses, use the short dataset when possible
system.time({
short_analysis <- avilist_2025_short %>%
filter(Taxon_rank == "species") %>%
count(Order)
})
#> user system elapsed
#> 0.011 0.001 0.012
# Filter early to reduce data size
songbirds <- avilist_2025_short %>%
filter(Order == "Passeriformes", Taxon_rank == "species")
cat("Songbird species:", nrow(songbirds), "\n")
#> Songbird species: 6705
# Select only needed columns to reduce memory usage
essential_fields <- avilist_2025 %>%
select(Scientific_name, English_name_AviList, Family, Order, Taxon_rank)
cat("Memory usage reduced from", ncol(avilist_2025), "to", ncol(essential_fields), "columns\n")
#> Memory usage reduced from 26 to 5 columns
taxize
library(taxize)
# Get a sample of species for validation
sample_species <- avilist_2025_short %>%
filter(Family == "Turdidae", Taxon_rank == "species") %>%
pull(Scientific_name) %>%
head(5)
# Validate names with GBIF (commented out to avoid API calls in vignette)
# gbif_validation <- get_gbifid(sample_species)
rebird
for eBird Integrationlibrary(rebird)
# Get Cornell Lab species codes from full dataset
thrush_codes <- avilist_2025 %>%
filter(Family == "Turdidae", Taxon_rank == "species") %>%
select(Scientific_name, Species_code_Cornell_Lab) %>%
filter(!is.na(Species_code_Cornell_Lab))
# Example: Get recent observations (commented out to avoid API calls)
# recent_thrushes <- ebirdregion("US-NY", species = thrush_codes$Species_code_Cornell_Lab[1])
# Find monotypic genera (genera with only one species)
monotypic_genera <- avilist_2025_short %>%
filter(Taxon_rank == "species") %>%
mutate(genus = str_extract(Scientific_name, "^[A-Z][a-z]+")) %>%
count(genus, Family) %>%
filter(n == 1) %>%
arrange(Family)
cat("Number of monotypic genera:", nrow(monotypic_genera), "\n")
#> Number of monotypic genera: 890
# Genera per family
monotypic_summary <- monotypic_genera %>%
count(Family, name = "monotypic_genera") %>%
arrange(desc(monotypic_genera)) %>%
head(10)
print(monotypic_summary)
#> # A tibble: 10 × 2
#> Family monotypic_genera
#> <chr> <int>
#> 1 Thraupidae 48
#> 2 Trochilidae 40
#> 3 Accipitridae 37
#> 4 Tyrannidae 35
#> 5 Anatidae 32
#> 6 Furnariidae 28
#> 7 Thamnophilidae 26
#> 8 Fringillidae 21
#> 9 Meliphagidae 20
#> 10 Sturnidae 20
# Analyze type localities (where species were first described)
type_localities <- avilist_2025 %>%
filter(Taxon_rank == "species", !is.na(Type_locality)) %>%
mutate(
continent = case_when(
str_detect(Type_locality, regex("Australia|New Zealand", ignore_case = TRUE)) ~ "Australasia",
str_detect(Type_locality, regex("Europe|European", ignore_case = TRUE)) ~ "Europe",
str_detect(Type_locality, regex("Africa|African", ignore_case = TRUE)) ~ "Africa",
str_detect(Type_locality, regex("Asia|Asian|China|Japan|India", ignore_case = TRUE)) ~ "Asia",
str_detect(Type_locality, regex("America|Brazil|Peru|Mexico|Canada|USA", ignore_case = TRUE)) ~ "Americas",
TRUE ~ "Other"
)
) %>%
count(continent, sort = TRUE)
print(type_localities)
#> # A tibble: 6 × 2
#> continent n
#> <chr> <int>
#> 1 Other 8233
#> 2 Americas 1459
#> 3 Asia 411
#> 4 Australasia 263
#> 5 Africa 257
#> 6 Europe 145
# Understand the available fields
print(avilist_metadata)
#> # A tibble: 26 × 6
#> field_name in_full_version in_short_version description data_type source
#> <chr> <lgl> <lgl> <chr> <chr> <chr>
#> 1 Authority TRUE TRUE Author and… character AviLi…
#> 2 AvibaseID TRUE TRUE Avibase da… character Aviba…
#> 3 Bibliographic_… TRUE TRUE Bibliograp… character AviLi…
#> 4 BirdLife_DataZ… TRUE FALSE BirdLife D… characte… BirdL…
#> 5 Birds_of_the_W… TRUE FALSE Birds of t… characte… Corne…
#> 6 Decision_summa… TRUE TRUE Data field… character AviLi…
#> 7 English_name_A… TRUE TRUE English co… character AviLi…
#> 8 English_name_B… TRUE FALSE Name field… character BirdL…
#> 9 English_name_C… TRUE FALSE English co… character Cleme…
#> 10 Extinct_or_pos… TRUE TRUE Data field… character AviLi…
#> # ℹ 16 more rows
# Fields available in short vs full dataset
cat("Fields in short dataset:\n")
#> Fields in short dataset:
short_fields <- avilist_metadata %>%
filter(in_short_version) %>%
pull(field_name)
cat(paste(short_fields, collapse = ", "), "\n\n")
#> Authority, AvibaseID, Bibliographic_details, Decision_summary, English_name_AviList, Extinct_or_possibly_extinct, Family, Family_English_name, IUCN_Red_List_Category, Order, Range, Scientific_name, Sequence, Taxon_rank
cat("Additional fields in full dataset:\n")
#> Additional fields in full dataset:
full_only_fields <- avilist_metadata %>%
filter(in_full_version & !in_short_version) %>%
pull(field_name)
cat(paste(full_only_fields, collapse = ", "), "\n")
#> BirdLife_DataZone_URL, Birds_of_the_World_URL, English_name_BirdLife_v9, English_name_Clements_v2024, Gender_of_genus, Original_description_URL, Proposal_number, Protonym, Species_code_Cornell_Lab, Title_of_original_description, Type_locality, Type_species_of_genus
The avilistr
package provides comprehensive access to
the unified AviList Global Avian Checklist. Key takeaways:
taxize
and rebird
for enhanced
functionalityFor more advanced functionality, future versions of the package may include dedicated search and validation functions.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.