In the R language, datasets are usually contained in a data.frame() object, or in one of their modernized versions. For example, tibble::tibble() or data.table::data.table() are inherited from the base data.frame().
This documentation is not updated yet to the development version of the [dataset] package.
The base data.frame()
constructor, like most base R
types, is very flexible. It allows the use of any kind of metadata
attached to the object.
foo <- data.frame( x = c(1,2), y = c(3,4))
attr(foo, "Title") <- "My Foo Object"
attributes(foo)
#> $names
#> [1] "x" "y"
#>
#> $class
#> [1] "data.frame"
#>
#> $row.names
#> [1] 1 2
#>
#> $Title
#> [1] "My Foo Object"
Metadata standardisation is critically essential for reproducible
research, publication, or linking resources on the web. The aim
dataset()
class is the creation of semantically enriched
data frames with as much interoperability as possible across various
sub-classes of the base R data.frame()
.
head (dataset(mtcars,
title="The Motor Trend [mtcar] Dataset",
author=person("Motor Trend Magazine"),
year=1974,
publisher="Motor Trend Magazine" ))
#> Motor Trend Magazine (1974). "The Motor Trend [mtcar] Dataset
#> [subset]."
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
#> Further metadata: describe(x)
temp_ttl_file <- file.path(tempdir(), "temp_ttl.ttl")
mtcars_dataset <- dataset(mtcars,
title="The Motor Trend [mtcar] Dataset",
author=person("Motor Trend Magazine"),
year=1974,
publisher="Motor Trend Magazine")
mtcars_namespace <- dataset_namespace[
dataset_namespace$prefix %in% c("owl:", "rdf:", "rdfs:", "qb:", "eg:"), ]
mtcars_dataset <- id_to_column(mtcars_dataset, prefix = "eg:", ids = NULL)
mtcars_dataset <- dataset_to_triples(mtcars_dataset, idcol = "rowid")
mtcars_dataset$p <- paste0("eg:mtcars#", mtcars_dataset$p)
mtcars_dataset$o <- xsd_convert(mtcars_dataset$o)
dataset_ttl_write(mtcars_dataset,
ttl_namespace = mtcars_namespace,
file_path = temp_ttl_file)
readLines(temp_ttl_file, 25)
#> [1] "@prefix eg: <http://example.org/ns#> ."
#> [2] "@prefix owl: <http://www.w3.org/2002/07/owl#> ."
#> [3] "@prefix qb: <http://purl.org/linked-data/cube#> ."
#> [4] "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ."
#> [5] "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ."
#> [6] ""
#> [7] "# -- Observations -----------------------------------------"
#> [8] ""
#> [9] "eg:Mazda-RX4 a qb:Observation ;"
#> [10] " eg:mtcars#mpg \"21\"^^<xs:decimal> ;"
#> [11] " eg:mtcars#cyl \"6\"^^<xs:decimal> ;"
#> [12] " eg:mtcars#disp \"160\"^^<xs:decimal> ;"
#> [13] " eg:mtcars#hp \"110\"^^<xs:decimal> ;"
#> [14] " eg:mtcars#drat \"3.9\"^^<xs:decimal> ;"
#> [15] " eg:mtcars#wt \"2.62\"^^<xs:decimal> ;"
#> [16] " eg:mtcars#qsec \"16.46\"^^<xs:decimal> ;"
#> [17] " eg:mtcars#vs \"0\"^^<xs:decimal> ;"
#> [18] " eg:mtcars#am \"1\"^^<xs:decimal> ;"
#> [19] " eg:mtcars#gear \"4\"^^<xs:decimal> ;"
#> [20] " eg:mtcars#carb \"4\"^^<xs:decimal> ;"
#> [21] " ."
#> [22] "eg:Mazda-RX4-Wag a qb:Observation ;"
#> [23] " eg:mtcars#mpg \"21\"^^<xs:decimal> ;"
#> [24] " eg:mtcars#cyl \"6\"^^<xs:decimal> ;"
#> [25] " eg:mtcars#disp \"160\"^^<xs:decimal> ;"
library(tibble)
ds_tibble <- dataset(as_tibble(mtcars),
title = "The Motor Trend [mtcar] Dataset",
author = person("Motor Trend Magazine"),
year = 1974,
publisher = "Motor Trend Magazine" )
rowid_to_column(ds_tibble)
#> Motor Trend Magazine (1974). "The Motor Trend [mtcar] Dataset."
#> # A tibble: 12 × 12
#> mpg cyl disp hp drat wt qsec vs am gear carb rowid
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 16.4 8 276. 180 3.07 4.07 17.4 0 0 3 3 12
#> 2 21 6 160 110 3.9 2.62 16.5 0 1 4 4 1
#> 3 21 6 160 110 3.9 2.88 17.0 0 1 4 4 2
#> 4 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 3
#> 5 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 4
#> 6 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 5
#> 7 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 6
#> 8 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 7
#> 9 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 8
#> 10 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 9
#> 11 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 10
#> 12 17.8 6 168. 123 3.92 3.44 18.9 1 0 4 4 11
#> Further metadata: describe(x)
library("nycflights13")
library("tsibble")
library("dplyr")
library("tidyr")
data("weather")
weather <- weather %>%
select(origin, time_hour, temp, humid, precip)
weather_tsbl <- as_tsibble(weather, key = origin, index = time_hour)
ds_weather <- dataset(weather_tsbl,
title = "Weather dataset",
author = person("Jane", "Doe"),
creator = person("Jane", "Doe"))
dataset_bibentry(ds_weather)
#> Doe J (2024). "Weather dataset."
full_weather <- ds_weather %>%
fill_gaps(precip = 0) %>%
group_by_key() %>%
tidyr::fill(temp, humid, .direction = "down")
full_weather %>%
group_by_key() %>%
index_by(year_month = ~ yearmonth(.)) %>% # monthly aggregates
summarise(
avg_temp = mean(temp, na.rm = TRUE),
ttl_precip = sum(precip, na.rm = TRUE)
)
#> # A tsibble: 36 x 4 [1M]
#> # Key: origin [3]
#> origin year_month avg_temp ttl_precip
#> <chr> <mth> <dbl> <dbl>
#> 1 EWR 2013 Jan 35.6 3.53
#> 2 EWR 2013 Feb 34.2 3.83
#> 3 EWR 2013 Mar 40.1 3
#> 4 EWR 2013 Apr 53.0 1.47
#> 5 EWR 2013 May 63.3 5.44
#> 6 EWR 2013 Jun 73.3 8.73
#> 7 EWR 2013 Jul 80.7 3.74
#> 8 EWR 2013 Aug 74.5 4.57
#> 9 EWR 2013 Sep 67.3 1.54
#> 10 EWR 2013 Oct 59.7 0.5
#> # ℹ 26 more rows
ds_full_weather <- dataset (full_weather %>%
group_by_key() %>%
index_by(year_month = ~ yearmonth(.)) %>% # monthly aggregates
summarise(
avg_temp = mean(temp, na.rm = TRUE),
ttl_precip = sum(precip, na.rm = TRUE)
),
title = "Weather tsibble",
author = person("Jane", "Doe"),
creator = person("Jane", "Doe"),
language = "eng",
description = "A replication of the tsibble README example."
)
print(as_datacite(ds_full_weather), "citation")
#> Doe J (2024). "Weather tsibble."
#>
#> A BibTeX entry for LaTeX users is
#>
#> @Misc{,
#> title = {Weather tsibble},
#> author = {Jane Doe},
#> identifier = {:tba},
#> publisher = {:unas},
#> year = {2024},
#> date = {:tba},
#> language = {eng},
#> alternateidentifier = {:unas},
#> relatedidentifier = {:unas},
#> format = {application/r-rds},
#> version = {0.1.0},
#> rights = {:unas},
#> description = {A replication of the tsibble README example.},
#> geolocation = {:unas},
#> fundingreference = {:unas},
#> }
temp_weather_file <- file.path(tempdir(), "temp_weather.ttl")
weather_namespace <- dataset_namespace[
dataset_namespace$prefix %in% c("owl:", "rdf:", "rdfs:", "qb:", "eg:"), ]
ds_weather <- id_to_column(ds_full_weather, prefix = "eg:", ids = NULL)
#ds_weather <- xsd_convert(ds_weather, idcol = "rowid")
ds_weather <- dataset_to_triples(ds_weather, idcol = "rowid")
ds_weather$p <- paste0("eg:weather#", ds_weather$p)
ds_weather$o <- xsd_convert(ds_weather$o)
dataset_ttl_write(ds_weather,
ttl_namespace = weather_namespace,
file_path = temp_weather_file)
readLines(temp_weather_file, 25)
#> [1] "@prefix eg: <http://example.org/ns#> ."
#> [2] "@prefix owl: <http://www.w3.org/2002/07/owl#> ."
#> [3] "@prefix qb: <http://purl.org/linked-data/cube#> ."
#> [4] "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> ."
#> [5] "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ."
#> [6] ""
#> [7] "# -- Observations -----------------------------------------"
#> [8] ""
#> [9] "eg:1 a qb:Observation ;"
#> [10] " eg:weather#origin \"EWR\"^^<xs:string> ;"
#> [11] " eg:weather#year_month \"15706\"^^<xs:string> ;"
#> [12] " eg:weather#avg_temp \"35.5694751009421\"^^<xs:string> ;"
#> [13] " eg:weather#ttl_precip \"3.53\"^^<xs:string> ;"
#> [14] " ."
#> [15] "eg:2 a qb:Observation ;"
#> [16] " eg:weather#origin \"EWR\"^^<xs:string> ;"
#> [17] " eg:weather#year_month \"15737\"^^<xs:string> ;"
#> [18] " eg:weather#avg_temp \"34.2266964285714\"^^<xs:string> ;"
#> [19] " eg:weather#ttl_precip \"3.83\"^^<xs:string> ;"
#> [20] " ."
#> [21] "eg:3 a qb:Observation ;"
#> [22] " eg:weather#origin \"EWR\"^^<xs:string> ;"
#> [23] " eg:weather#year_month \"15765\"^^<xs:string> ;"
#> [24] " eg:weather#avg_temp \"40.1186541049798\"^^<xs:string> ;"
#> [25] " eg:weather#ttl_precip \"3\"^^<xs:string> ;"