Seach and retrieve data from the Global Biodiverity Information Facilty (GBIF)
rgbif
is an R package to search and retrieve data from the Global Biodiverity Information Facilty (GBIF). rgbif
wraps R code around the GBIF API to allow you to talk to GBIF from R.
Install from CRAN
install.packages("rgbif")
Or install the development version from GitHub
devtools::install_github("ropensci/rgbif")
Load rgbif
library(rgbif)
Search by type of record, all observational in this case
occ_count(basisOfRecord='OBSERVATION')
#> [1] 100384118
Records for Puma concolor with lat/long data (georeferened) only. Note that hasCoordinate
in occ_search()
is the same as georeferenced
in occ_count()
.
occ_count(taxonKey=2435099, georeferenced=TRUE)
#> [1] 2766
All georeferenced records in GBIF
occ_count(georeferenced=TRUE)
#> [1] 449080881
Records from Denmark
denmark_code <- isocodes[grep("Denmark", isocodes$name), "code"]
occ_count(country=denmark_code)
#> [1] 8734543
Number of records in a particular dataset
occ_count(datasetKey='9e7ea106-0bf8-4087-bb61-dfe4f29e0f17')
#> [1] 4591
All records from 2012
occ_count(year=2012)
#> [1] 36162988
Records for a particular dataset, and only for preserved specimens
occ_count(datasetKey='e707e6da-e143-445d-b41d-529c4a777e8b', basisOfRecord='OBSERVATION')
#> [1] 2120907
Get possible values to be used in taxonomic rank arguments in functions
taxrank()
#> [1] "kingdom" "phylum" "class" "order"
#> [5] "family" "genus" "species" "infraspecific"
name_lookup()
does full text search of name usages covering the scientific and vernacular name, the species description, distribution and the entire classification across all name usages of all or some checklists. Results are ordered by relevance as this search usually returns a lot of results.
By default name_lookup()
returns five slots of information: meta, data, facets, hierarchies, and names. hierarchies and names elements are named by their matching GBIF key in the data.frame
in the data slot.
out <- name_lookup(query='mammalia')
names(out)
#> [1] "meta" "data" "facets" "hierarchies" "names"
out$meta
#> offset limit endOfRecords count
#> 1 0 100 FALSE 122151
head(out$data)
#> key nubKey parentKey parent phylum phylumKey classKey
#> 1 125798198 359 137006861 Chordata Chordata 137006861 125798198
#> 2 116665331 359 116842680 Chordata Chordata 116842680 116665331
#> 3 359 359 44 Chordata Chordata 44 359
#> 4 125826646 359 137006861 Chordata Chordata 137006861 125826646
#> 5 137066410 359 137066409 Macroscelidea Chordata 116842680 137066409
#> 6 102402290 359 102545028 Chordata Chordata 102545028 102402290
#> canonicalName authorship nameType rank numOccurrences kingdom
#> 1 Mammalia WELLFORMED CLASS 0 <NA>
#> 2 Mammalia Linnaeus, 1758 WELLFORMED CLASS 0 Animalia
#> 3 Mammalia Linnaeus, 1758 WELLFORMED CLASS 0 Animalia
#> 4 Mammalia Linnaeus, 1758 WELLFORMED CLASS 0 <NA>
#> 5 Mammalia WELLFORMED ORDER 0 Animalia
#> 6 Mammalia WELLFORMED CLASS 0 Animalia
#> kingdomKey order orderKey family familyKey genus genusKey
#> 1 NA <NA> NA <NA> NA <NA> NA
#> 2 116630539 <NA> NA <NA> NA <NA> NA
#> 3 1 <NA> NA <NA> NA <NA> NA
#> 4 NA <NA> NA <NA> NA <NA> NA
#> 5 116630539 Mammalia 137066410 <NA> NA <NA> NA
#> 6 101719444 <NA> NA <NA> NA <NA> NA
out$facets
#> NULL
out$hierarchies[1:2]
#> $`125798198`
#> rankkey name
#> 1 137006861 Chordata
#>
#> $`116665331`
#> rankkey name
#> 1 116630539 Animalia
#> 2 116842680 Chordata
out$names[2]
#> $`116665331`
#> vernacularName language
#> 1 Mammals ENGLISH
Search for a genus
head(name_lookup(query='Cnaemidophorus', rank="genus", return="data"))
#> key nubKey parentKey parent kingdom phylum
#> 1 116755723 1858636 110614854 Pterophoridae Animalia Arthropoda
#> 2 1858636 1858636 8863 Pterophoridae Animalia Arthropoda
#> 3 125802004 1858636 125793784 Pterophoridae <NA> <NA>
#> 4 134773282 1858636 NA <NA> <NA> <NA>
#> 5 127882857 1858636 127804516 Pterophoridae Animalia Arthropoda
#> 6 128073730 1858636 128015003 Viruses Viruses <NA>
#> order family genus kingdomKey phylumKey classKey
#> 1 Lepidoptera Pterophoridae Cnaemidophorus 116630539 116668755 116686069
#> 2 Lepidoptera Pterophoridae Cnaemidophorus 1 54 216
#> 3 Lepidoptera Pterophoridae Cnaemidophorus NA NA 137009267
#> 4 <NA> <NA> Cnaemidophorus NA NA NA
#> 5 Lepidoptera Pterophoridae Cnaemidophorus 127795487 127795488 127795683
#> 6 <NA> <NA> Cnaemidophorus 128015003 NA NA
#> orderKey familyKey genusKey canonicalName authorship nameType
#> 1 116843281 110614854 116755723 Cnaemidophorus Wallengren, 1862 WELLFORMED
#> 2 797 8863 1858636 Cnaemidophorus Wallengren, 1862 WELLFORMED
#> 3 125810165 125793784 125802004 Cnaemidophorus Wallengren, 1862 WELLFORMED
#> 4 NA NA 134773282 Cnaemidophorus WELLFORMED
#> 5 127795981 127804516 127882857 Cnaemidophorus WELLFORMED
#> 6 NA NA 128073730 Cnaemidophorus WELLFORMED
#> rank numOccurrences
#> 1 GENUS 0
#> 2 GENUS 0
#> 3 GENUS 0
#> 4 GENUS 0
#> 5 GENUS 0
#> 6 GENUS 0
Search for the class mammalia
head(name_lookup(query='mammalia', return = 'data'))
#> key nubKey parentKey parent phylum phylumKey classKey
#> 1 125798198 359 137006861 Chordata Chordata 137006861 125798198
#> 2 116665331 359 116842680 Chordata Chordata 116842680 116665331
#> 3 359 359 44 Chordata Chordata 44 359
#> 4 125826646 359 137006861 Chordata Chordata 137006861 125826646
#> 5 137066410 359 137066409 Macroscelidea Chordata 116842680 137066409
#> 6 102402290 359 102545028 Chordata Chordata 102545028 102402290
#> canonicalName authorship nameType rank numOccurrences kingdom
#> 1 Mammalia WELLFORMED CLASS 0 <NA>
#> 2 Mammalia Linnaeus, 1758 WELLFORMED CLASS 0 Animalia
#> 3 Mammalia Linnaeus, 1758 WELLFORMED CLASS 0 Animalia
#> 4 Mammalia Linnaeus, 1758 WELLFORMED CLASS 0 <NA>
#> 5 Mammalia WELLFORMED ORDER 0 Animalia
#> 6 Mammalia WELLFORMED CLASS 0 Animalia
#> kingdomKey order orderKey family familyKey genus genusKey
#> 1 NA <NA> NA <NA> NA <NA> NA
#> 2 116630539 <NA> NA <NA> NA <NA> NA
#> 3 1 <NA> NA <NA> NA <NA> NA
#> 4 NA <NA> NA <NA> NA <NA> NA
#> 5 116630539 Mammalia 137066410 <NA> NA <NA> NA
#> 6 101719444 <NA> NA <NA> NA <NA> NA
Look up the species Helianthus annuus
head(name_lookup('Helianthus annuus', rank="species", return = 'data'))
#> key nubKey parentKey parent kingdom order
#> 1 116845199 3119195 116853573 Helianthus Plantae Asterales
#> 2 3119195 3119195 3119134 Helianthus Plantae Asterales
#> 3 125790787 3119195 125809269 Helianthus <NA> Asterales
#> 4 106239436 3119195 106239325 Helianthus Viridiplantae Asterales
#> 5 128399814 3119195 134815689 Helianthus <NA> <NA>
#> 6 111449704 3119195 111449703 Helianthus Plantae <NA>
#> family genus kingdomKey orderKey familyKey genusKey
#> 1 Asteraceae Helianthus 116668764 116852024 116856030 116853573
#> 2 Asteraceae Helianthus 6 414 3065 3119134
#> 3 Asteraceae Helianthus NA 137012188 125799038 125809269
#> 4 Asteraceae Helianthus 106147210 106237428 106237535 106239325
#> 5 <NA> Helianthus NA NA NA 134815689
#> 6 Compositae Helianthus 111449174 NA 111442813 111449703
#> canonicalName authorship nameType rank numOccurrences
#> 1 Helianthus annuus L. WELLFORMED SPECIES 0
#> 2 Helianthus annuus L. WELLFORMED SPECIES 0
#> 3 Helianthus annuus L. WELLFORMED SPECIES 0
#> 4 Helianthus annuus WELLFORMED SPECIES 0
#> 5 Helianthus annuus WELLFORMED SPECIES 0
#> 6 Helianthus annuus L. WELLFORMED SPECIES 0
#> phylum phylumKey classKey
#> 1 <NA> NA NA
#> 2 Magnoliophyta 49 220
#> 3 <NA> NA NA
#> 4 Streptophyta 106171079 NA
#> 5 <NA> NA NA
#> 6 Spermatophyta 111449175 111449177
The function name_usage()
works with lots of different name endpoints in GBIF, listed at http://www.gbif.org/developer/species#nameUsages.
library("plyr")
out <- name_usage(key=3119195, language="FRENCH", data='vernacularNames')
compact(lapply(out$results, function(x) if(x$language=="FRENCH") x else NULL))[1:2]
#> [[1]]
#> [[1]]$vernacularName
#> [1] "grand soleil"
#>
#> [[1]]$language
#> [1] "FRENCH"
#>
#> [[1]]$sourceTaxonKey
#> [1] 107001935
#>
#> [[1]]$preferred
#> [1] FALSE
#>
#>
#> [[2]]
#> [[2]]$vernacularName
#> [1] "grand soleil"
#>
#> [[2]]$language
#> [1] "FRENCH"
#>
#> [[2]]$sourceTaxonKey
#> [1] 100019171
#>
#> [[2]]$preferred
#> [1] FALSE
The function name_backbone()
is used to search against the GBIF backbone taxonomy
name_backbone(name='Helianthus', rank='genus', kingdom='plants')
#> $usageKey
#> [1] 3119134
#>
#> $scientificName
#> [1] "Helianthus L."
#>
#> $canonicalName
#> [1] "Helianthus"
#>
#> $rank
#> [1] "GENUS"
#>
#> $synonym
#> [1] FALSE
#>
#> $confidence
#> [1] 97
#>
#> $matchType
#> [1] "EXACT"
#>
#> $kingdom
#> [1] "Plantae"
#>
#> $phylum
#> [1] "Magnoliophyta"
#>
#> $order
#> [1] "Asterales"
#>
#> $family
#> [1] "Asteraceae"
#>
#> $genus
#> [1] "Helianthus"
#>
#> $kingdomKey
#> [1] 6
#>
#> $phylumKey
#> [1] 49
#>
#> $classKey
#> [1] 220
#>
#> $orderKey
#> [1] 414
#>
#> $familyKey
#> [1] 3065
#>
#> $genusKey
#> [1] 3119134
#>
#> $class
#> [1] "Magnoliopsida"
The function name_suggest()
is optimized for speed, and gives back suggested names based on query parameters.
head( name_suggest(q='Puma concolor') )
#> key canonicalName rank
#> 1 2435099 Puma concolor SPECIES
#> 2 6164589 Puma concolor anthonyi SUBSPECIES
#> 3 6164590 Puma concolor couguar SUBSPECIES
#> 4 6164591 Puma concolor kaibabensis SUBSPECIES
#> 5 6164592 Puma concolor oregonensis SUBSPECIES
#> 6 6164594 Puma concolor vancouverensis SUBSPECIES
Get data for a single occurrence. Note that data is returned as a list, with slots for metadata and data, or as a hierarchy, or just data.
Just data
occ_get(key=766766824, return='data')
#> name key decimalLatitude decimalLongitude issues
#> 1 Corvus monedula 766766824 59.46 17.91 depunl,gass84
Just taxonomic hierarchy
occ_get(key=766766824, return='hier')
#> name key rank
#> 1 Animalia 1 kingdom
#> 2 Chordata 44 phylum
#> 3 Aves 212 class
#> 4 Passeriformes 729 order
#> 5 Corvidae 5235 family
#> 6 Corvus 2482468 genus
#> 7 Corvus monedula 2482473 species
All data, or leave return parameter blank
occ_get(key=766766824, return='all')
#> $hierarchy
#> name key rank
#> 1 Animalia 1 kingdom
#> 2 Chordata 44 phylum
#> 3 Aves 212 class
#> 4 Passeriformes 729 order
#> 5 Corvidae 5235 family
#> 6 Corvus 2482468 genus
#> 7 Corvus monedula 2482473 species
#>
#> $media
#> list()
#>
#> $data
#> name key decimalLatitude decimalLongitude issues
#> 1 Corvus monedula 766766824 59.46 17.91 depunl,gass84
Get many occurrences. occ_get
is vectorized
occ_get(key=c(766766824,101010,240713150,855998194,49819470), return='data')
#> name key decimalLatitude decimalLongitude
#> 1 Corvus monedula 766766824 59.46 17.91
#> 2 Platydoras costatus 101010 -4.35 -70.07
#> 3 none 240713150 -77.57 163.58
#> 4 Sciurus vulgaris 855998194 58.41 12.04
#> 5 Phlogophora meticulosa 49819470 55.72 13.28
#> issues
#> 1 depunl,gass84
#> 2 cucdmis,gass84
#> 3 cdround,gass84,txmatnon
#> 4 depunl,gass84
#> 5 cdround,gass84
By default occ_search()
returns a dplyr
like output summary in which the data printed expands based on how much data is returned, and the size of your window. You can search by scientific name:
occ_search(scientificName = "Ursus americanus", limit = 20)
#> Records found [8313]
#> Records returned [20]
#> No. unique hierarchies [1]
#> No. media records [20]
#> Args [scientificName=Ursus americanus, fields=all]
#> First 10 rows of data
#>
#> name key decimalLatitude decimalLongitude
#> 1 Ursus americanus 891034709 29.23 -103.29
#> 2 Ursus americanus 891045574 43.74 -72.53
#> 3 Ursus americanus 891041363 29.28 -103.29
#> 4 Ursus americanus 1024328693 34.21 -118.15
#> 5 Ursus americanus 891056344 29.27 -103.32
#> 6 Ursus americanus 1024182262 50.09 -117.46
#> 7 Ursus americanus 1024180980 34.57 -119.16
#> 8 Ursus americanus 911496466 29.28 -103.30
#> 9 Ursus americanus 1024328712 39.51 -120.16
#> 10 Ursus americanus 1024222560 59.86 -129.18
#> .. ... ... ... ...
#> Variables not shown: issues (chr), datasetKey (chr), publishingOrgKey
#> (chr), publishingCountry (chr), protocol (chr), lastCrawled (chr),
#> lastParsed (chr), extensions (chr), basisOfRecord (chr), taxonKey
#> (int), kingdomKey (int), phylumKey (int), classKey (int), orderKey
#> (int), familyKey (int), genusKey (int), speciesKey (int),
#> scientificName (chr), kingdom (chr), phylum (chr), order (chr),
#> family (chr), genus (chr), species (chr), genericName (chr),
#> specificEpithet (chr), taxonRank (chr), dateIdentified (chr), year
#> (int), month (int), day (int), eventDate (chr), modified (chr),
#> lastInterpreted (chr), references (chr), identifiers (chr), facts
#> (chr), relations (chr), geodeticDatum (chr), class (chr), countryCode
#> (chr), country (chr), verbatimEventDate (chr), rights (chr),
#> rightsHolder (chr), occurrenceID (chr), taxonID (chr), collectionCode
#> (chr), gbifID (chr), institutionCode (chr), catalogNumber (chr),
#> datasetName (chr), recordedBy (chr), eventTime (chr), identifier
#> (chr), identificationID (chr), verbatimLocality (chr),
#> occurrenceRemarks (chr), infraspecificEpithet (chr),
#> informationWithheld (chr)
Or to be more precise, you can search for names first, make sure you have the right name, then pass the GBIF key to the occ_search()
function:
key <- name_suggest(q='Helianthus annuus', rank='species')$key[1]
occ_search(taxonKey=key, limit=20)
#> Records found [20584]
#> Records returned [20]
#> No. unique hierarchies [1]
#> No. media records [10]
#> Args [taxonKey=3119195, fields=all]
#> First 10 rows of data
#>
#> name key decimalLatitude decimalLongitude
#> 1 Helianthus annuus 922042404 -3.281 37.524
#> 2 Helianthus annuus 899948224 1.279 103.799
#> 3 Helianthus annuus 891052261 24.826 -99.584
#> 4 Helianthus annuus 922039507 50.314 8.523
#> 5 Helianthus annuus 922044332 21.271 40.414
#> 6 Helianthus annuus 998785009 44.109 4.668
#> 7 Helianthus annuus 899969160 24.829 -99.583
#> 8 Helianthus annuus 899970378 32.540 -117.087
#> 9 Helianthus annuus 932352628 56.105 15.614
#> 10 Helianthus annuus 932108644 56.262 16.037
#> .. ... ... ... ...
#> Variables not shown: issues (chr), datasetKey (chr), publishingOrgKey
#> (chr), publishingCountry (chr), protocol (chr), lastCrawled (chr),
#> lastParsed (chr), extensions (chr), basisOfRecord (chr), taxonKey
#> (int), kingdomKey (int), phylumKey (int), classKey (int), orderKey
#> (int), familyKey (int), genusKey (int), speciesKey (int),
#> scientificName (chr), kingdom (chr), phylum (chr), order (chr),
#> family (chr), genus (chr), species (chr), genericName (chr),
#> specificEpithet (chr), taxonRank (chr), year (int), month (int), day
#> (int), eventDate (chr), lastInterpreted (chr), identifiers (chr),
#> facts (chr), relations (chr), geodeticDatum (chr), class (chr),
#> countryCode (chr), country (chr), gbifID (chr), institutionCode
#> (chr), catalogNumber (chr), recordedBy (chr), locality (chr),
#> collectionCode (chr), dateIdentified (chr), modified (chr),
#> references (chr), verbatimEventDate (chr), verbatimLocality (chr),
#> rights (chr), rightsHolder (chr), occurrenceID (chr), taxonID (chr),
#> occurrenceRemarks (chr), datasetName (chr), eventTime (chr),
#> identifier (chr), identificationID (chr), identifiedBy (chr),
#> coordinateAccuracy (dbl), elevation (dbl), elevationAccuracy (dbl),
#> depth (dbl), depthAccuracy (dbl), stateProvince (chr), county (chr)
Like many functions in rgbif
, you can choose what to return with the return
parameter, here, just returning the metadata:
occ_search(taxonKey=key, return='meta')
#> offset limit endOfRecords count
#> 1 320 180 FALSE 20584
You can choose what fields to return. This isn't passed on to the API query to GBIF as they don't allow that, but we filter out the columns before we give the data back to you.
occ_search(scientificName = "Ursus americanus", fields=c('name','basisOfRecord','protocol'), limit = 20)
#> Records found [8313]
#> Records returned [20]
#> No. unique hierarchies [1]
#> No. media records [20]
#> Args [scientificName=Ursus americanus, fields=name,basisOfRecord,protocol]
#> First 10 rows of data
#>
#> name protocol basisOfRecord
#> 1 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 2 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 3 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 4 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 5 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 6 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 7 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 8 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 9 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> 10 Ursus americanus DWC_ARCHIVE HUMAN_OBSERVATION
#> .. ... ... ...
Most parameters are vectorized, so you can pass in more than one value:
splist <- c('Cyanocitta stelleri', 'Junco hyemalis', 'Aix sponsa')
keys <- sapply(splist, function(x) name_suggest(x)$key[1], USE.NAMES=FALSE)
occ_search(taxonKey=keys, limit=5)
#> Occ. found [2482598 (356609), 2492010 (1945105), 2498387 (591867)]
#> Occ. returned [2482598 (20), 2492010 (20), 2498387 (20)]
#> No. unique hierarchies [2482598 (1), 2492010 (1), 2498387 (1)]
#> No. media records [2482598 (20), 2492010 (20), 2498387 (16)]
#> Args [taxonKey=2482598,2492010,2498387, fields=all]
#> First 10 rows of data from 2482598
#>
#> name key decimalLatitude decimalLongitude
#> 1 Cyanocitta stelleri 891781350 37.74 -122.49
#> 2 Cyanocitta stelleri 891046151 19.29 -98.66
#> 3 Cyanocitta stelleri 891047537 37.87 -122.24
#> 4 Cyanocitta stelleri 891042142 37.24 -121.96
#> 5 Cyanocitta stelleri 891051134 35.19 -111.64
#> 6 Cyanocitta stelleri 891056081 37.77 -122.47
#> 7 Cyanocitta stelleri 891040613 38.57 -122.68
#> 8 Cyanocitta stelleri 891051675 38.94 -119.97
#> 9 Cyanocitta stelleri 891051459 32.87 -116.42
#> 10 Cyanocitta stelleri 891051562 38.94 -119.97
#> .. ... ... ... ...
#> Variables not shown: issues (chr), datasetKey (chr), publishingOrgKey
#> (chr), publishingCountry (chr), protocol (chr), lastCrawled (chr),
#> lastParsed (chr), extensions (chr), basisOfRecord (chr), taxonKey
#> (int), kingdomKey (int), phylumKey (int), classKey (int), orderKey
#> (int), familyKey (int), genusKey (int), speciesKey (int),
#> scientificName (chr), kingdom (chr), phylum (chr), order (chr),
#> family (chr), genus (chr), species (chr), genericName (chr),
#> specificEpithet (chr), taxonRank (chr), dateIdentified (chr), year
#> (int), month (int), day (int), eventDate (chr), modified (chr),
#> lastInterpreted (chr), references (chr), identifiers (chr), facts
#> (chr), relations (chr), geodeticDatum (chr), class (chr), countryCode
#> (chr), country (chr), verbatimEventDate (chr), verbatimLocality
#> (chr), rights (chr), rightsHolder (chr), occurrenceID (chr), taxonID
#> (chr), collectionCode (chr), gbifID (chr), institutionCode (chr),
#> catalogNumber (chr), datasetName (chr), recordedBy (chr), eventTime
#> (chr), identifier (chr), identificationID (chr), occurrenceRemarks
#> (chr)
Static map using the ggplot2 package. Make a map of Puma concolor occurrences.
key <- name_backbone(name='Puma concolor')$speciesKey
dat <- occ_search(taxonKey=key, return='data', limit=300)
gbifmap(input=dat)