In this vignette we compare computational requirements (time and memory) of common operations using data.table and tidyverse functions.

Setup

library(data.table)
#> data.table 1.14.7 IN DEVELOPMENT built 2022-11-29 19:23:51 UTC using 1 threads (see ?getDTthreads).  Latest news: r-datatable.com
#> **********
#> This development version of data.table was built more than 4 weeks ago. Please update: data.table::update_dev_pkg()
#> **********
hostname <- system("hostname",intern=TRUE)
as.integer(Sys.getenv("SLURM_JOB_CPUS_PER_NODE", "1"))
#> [1] 1
max.threads <- parallel::detectCores()
threads.vec <- unique(as.integer(c(1, max.threads/2, max.threads)))
seconds.limit <- 1
cache.list <- list()
cache <- function(symbol, code){
  cache.symb <- substitute(symbol)
  cache.dir <- "~/R/atime-cache"
  cache.rds <- file.path(cache.dir, paste0(cache.symb, ".RDS"))
  if(file.exists(cache.rds)){
    value <- readRDS(cache.rds)
  }else{
    to.eval <- substitute(code)
    value <- eval(to.eval)
    value$hostname <- hostname
    if(dir.exists(cache.dir))saveRDS(value, cache.rds)
  }
  cache.list[[paste(cache.symb)]] <<- value
  assign(paste(cache.symb), value, parent.frame())
}
aplot <- function(atime.list, my.title, xmax, max.seconds, xlab, color.vec=NULL){
  best.list <- atime::references_best(atime.list)
  blank.dt <- data.table(x=best.list$meas$N[1], y=max.seconds, unit="seconds")
  if(require(ggplot2)){
    hline.df <- with(atime.list, data.frame(seconds.limit, unit="seconds"))
    gg <- ggplot()+
      ggtitle(paste(my.title,"on",hostname))+
      theme_bw()+
      geom_blank(aes(
        x, y),
        data=blank.dt)+
      facet_grid(unit ~ ., scales="free")+
      geom_hline(aes(
        yintercept=seconds.limit),
        color="grey",
        data=hline.df)+
      geom_line(aes(
        N, empirical, color=expr.name),
        data=best.list$meas)+
      geom_ribbon(aes(
        N, ymin=q25, ymax=q75, fill=expr.name),
        data=best.list$meas[unit=="seconds"],
        alpha=0.5)+
      scale_x_log10(xlab)+
      scale_y_log10("median line, quartiles band")
    if(!is.null(color.vec)){
      gg <- gg+
        scale_color_manual(values=color.vec)+
        scale_fill_manual(values=color.vec)
    }
    if(require(directlabels)){
      gg+
        directlabels::geom_dl(aes(
          N, empirical, color=expr.name, label=expr.name),
          method="right.polygons",
          data=best.list$meas)+
        theme(legend.position="none")+
        coord_cartesian(xlim=c(NA,xmax))
    }else{
      gg
    }
  }
}

Writing CSV

First we define some code which will be used in all of the writing benchmarks,

atime_write <- function(make.mat.fun, fmt){
  grid.args <- list(
    list(THREADS=threads.vec),
    "data.table::fwrite"=quote({
      data.table::setDTthreads(THREADS)
      data.table::fwrite(dt, name.list$fwrite, showProgress = FALSE)
    }))
  if(requireNamespace("readr"))grid.args[["readr::write_csv"]] <- quote({
    readr::write_csv(
      dt, name.list$write_csv, progress = FALSE, num_threads = THREADS)
  })
  expr.list <- do.call(atime::atime_grid, grid.args)
  atime::atime(
    N=as.integer(10^seq(0, 6, by=0.5)),
    setup={
      mat <- make.mat.fun(N)
      name.list <- list()
      for(fun in c("fwrite", "write_csv", "write.csv")){
        name.list[[fun]] <- file.path(
          tempdir(), sprintf(fmt, fun, N))
      }
      dt <- data.table(mat)
    },
    seconds.limit = seconds.limit,
    expr.list=expr.list,
    "utils::write.csv"=utils::write.csv(dt, name.list$write.csv))
}
if(FALSE){
  RColorBrewer::display.brewer.all()
  dput(RColorBrewer::brewer.pal(Inf, "Set2"))
  dput(RColorBrewer::brewer.pal(Inf, "RdGy"))
}
write.colors <- c(
  "#67001F",#dark red
  "#B2182B", "#D6604D", "#F4A582", "#FDDBC7",
  "#FFFFFF",#white
  "#E0E0E0", "#BABABA", "#878787", "#4D4D4D",
  "#1A1A1A",#almost black
  "utils::write.csv"="deepskyblue")
some <- function(...)c(...)[1:length(threads.vec)]
names(write.colors)[some(3,2,1)] <- paste0(
  "data.table::fwrite THREADS=",threads.vec)
names(write.colors)[some(9,10,11)] <- paste0(
  "readr::write_csv THREADS=",threads.vec)
write.colors <- write.colors[names(write.colors)!=""]

The code below is for real numbers with a constant number of columns, and a variable number of rows.

random_real <- function(N.rows, N.cols){
  set.seed(1)
  matrix(rnorm(N.rows*N.cols), N.rows, N.cols)
}
cache(write.real.vary.rows, atime_write(
  function(N.rows, N.cols=10)random_real(N.rows, N.cols),
  "10_real_cols_%s_%d.csv"))
aplot(write.real.vary.rows, "Write CSV with 10 random normal real columns", 1e9, 1e1, "Number of rows", write.colors)
#> Loading required package: ggplot2
#> Loading required package: directlabels
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

The code below writes real numbers with a constant number of rows, and a variable number of columns.

cache(write.real.vary.cols, atime_write(
  function(N.cols, N.rows=10)random_real(N.rows, N.cols),
  "10_real_rows_%s_%d.csv"))
aplot(write.real.vary.cols, "Write CSV with 10 random normal real rows", 1e9, 1e1, "Number of columns", write.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

The code below is for character data with a constant number of columns, and a variable number of rows.

chr_mat <- function(N.rows, N.cols){
  data.vec <- paste0("'quoted", c(" ", "_"), "data'")
  matrix(data.vec, N.rows, N.cols)
}
cache(write.chr.vary.rows, atime_write(
  function(N.rows,N.cols=10)chr_mat(N.rows, N.cols),
  "10_chr_cols_%s_%d.csv"))
aplot(write.chr.vary.rows, "Write CSV with 10 character columns", 1e9, 1e1, "Number of rows", write.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

The code below is for character data with a constant number of rows, and a variable number of columns.

cache(write.chr.vary.cols, atime_write(
  function(N.cols, N.rows=10)chr_mat(N.rows, N.cols),
  "10_chr_rows_%s_%d.csv"))
aplot(write.chr.vary.cols, "Write CSV with 10 character rows", 1e9, 1e1, "Number of columns", write.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

The figure above shows that data.table::fwrite clearly has a smaller slope (linear complexity in number of columns) than the other methods (quadratic complexity), as shown in the plot below, which includes best reference lines above and below each empirical measurement asymptote.

best.list <- atime::references_best(write.chr.vary.cols)
one.thread <- function(DT)DT[grepl("utils|THREADS=1$", expr.name)]
meas.dt <- one.thread(best.list$meas)
ref.dt <- one.thread(best.list$ref)[
  fun.name %in% c("N^2","N") & N >= 1e2]
if(require(ggplot2)){
  hline.df <- with(write.chr.vary.cols, data.frame(
    seconds.limit, unit="seconds"))
  gg <- ggplot()+
    ggtitle("Write CSV with 10 character rows, asymptotic complexity")+
    theme_bw()+
    facet_grid(unit ~ expr.name, scales="free")+
    geom_hline(aes(
      yintercept=seconds.limit),
      color="grey",
      data=hline.df)+
    geom_line(aes(
      N, reference, group=paste(expr.name, fun.name)),
      linewidth=2,
      data=ref.dt)+
    geom_line(aes(
      N, empirical, color=expr.name),
      linewidth=1,
      data=meas.dt)+
    geom_ribbon(aes(
      N, ymin=q25, ymax=q75, fill=expr.name),
      data=meas.dt[unit=="seconds"],
      alpha=0.5)+
    scale_x_log10("N = Number of columns")+
    scale_y_log10("median line, quartiles band")+
    scale_color_manual(values=write.colors)+
    scale_fill_manual(values=write.colors)
  if(require(directlabels)){
    gg+
      directlabels::geom_dl(aes(
        N, reference,
        label.group=paste(expr.name, fun.name),
        label=fun.name),
        method="left.polygons",
        data=ref.dt)+
      theme(legend.position="none")
  }else{
    gg
  }
}
#> Warning: Transformation introduced infinite values in continuous
#> y-axis

The comparisons above show significant advantages for data.table for writing CSV data with a large number of columns: asymptotically less time and memory (linear rather than quadratic in number of columns).

Reading CSV

First we define a function which we will use for all of the read benchmarks,

read.expr.list <- c(
  if(requireNamespace("readr"))atime::atime_grid(
    list(LAZY=c(TRUE, FALSE), THREADS=threads.vec),
    "readr::read_csv"={
      readr::read_csv(
        f.csv, num_threads = THREADS, lazy = LAZY,
        show_col_types=FALSE, progress=FALSE)
    }),
  atime::atime_grid(
    list(THREADS=threads.vec),
    "data.table::fread"={
      data.table::setDTthreads(THREADS)
      data.table::fread(f.csv, showProgress=FALSE)
    }),
  atime::atime_grid(
    "utils::read.csv"={
      utils::read.csv(f.csv)
    }))
#> Loading required namespace: readr
read.compute.expr.list <- list()
for(expr.name in names(read.expr.list)){
  lang.list <- as.list(read.expr.list[[expr.name]])
  LAST <- length(lang.list)
  lang.list[[LAST]] <- as.call(c(
    quote(`<-`),
    quote(DF),
    lang.list[[LAST]]))
  read.compute.expr.list[[expr.name]] <- as.call(c(
    lang.list,
    quote(apply(DF, 1, paste, collapse=","))))
}
atime_read <- function(glob, compute=FALSE){  
  fmt <- sub("[*]", "%d", glob)
  csv.dt <- nc::capture_first_vec(
    Sys.glob(file.path(tempdir(), glob)),
    N="[0-9]+", as.integer,
    ".csv")[order(N)]
  atime::atime(
    N=csv.dt$N,
    setup={
      f.csv <- file.path(tempdir(), sprintf(fmt, N))
    },
    seconds.limit = seconds.limit,
    expr.list=if(compute)read.compute.expr.list else read.expr.list)
}
PRGn <- c(
  "#40004B", "#762A83", "#9970AB", "#C2A5CF", "#E7D4E8",#5
  "#F7F7F7", #6
  "#D9F0D3", "#A6DBA0", "#5AAE61", "#1B7837", "#00441B",#11
  "#67001F",#dark red 12
  "#B2182B", "#D6604D", "#F4A582", "#FDDBC7",
  "#FFFFFF",#white 17
  "#E0E0E0", "#BABABA", "#878787", "#4D4D4D",
  "#1A1A1A",#almost black 22
  "utils::write.csv"="deepskyblue")
names(PRGn)[c(
  some(3,2,1),
  some(9,10,11),
  some(14,13,12),
  23)] <- names(read.expr.list)
read.colors <- PRGn[names(PRGn)!=""]

Below we read real numbers with a constant number of columns, and a variable number of rows.

cache(read.real.vary.rows, atime_read("10_real_cols_fwrite_*.csv"))
aplot(read.real.vary.rows, "Read CSV with 10 real columns", 1e9, 1e1, "Number of rows", read.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

It can be seen in the plot above that the green results, read_csv with LAZY=TRUE are fastest, which is normal because lazy reading does not actually read the data values into memory. A more fair comparison is below, which computes a text string for every row after reading the CSV,

cache(compute.real.vary.rows, atime_read("10_real_cols_fwrite_*.csv", compute=TRUE))
aplot(compute.real.vary.rows, "Read CSV with 10 real columns, then collapse each row", 1e9, 1e1, "Number of rows", read.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

Below we read real numbers with a constant number of rows, and a variable number of columns.

cache(read.real.vary.cols, atime_read("10_real_rows_fwrite_*.csv"))
aplot(read.real.vary.cols, "Read CSV with 10 real rows", 1e8, 1e1, "Number of columns", read.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

Below we read character data with a constant number of columns, and a variable number of rows.

cache(read.chr.vary.rows, atime_read("10_chr_cols_fwrite_*.csv"))
aplot(read.chr.vary.rows, "Read CSV with 10 character columns", 1e9, 1e1, "Number of rows", read.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

As with the previous result for real data, the green results above, read_csv with LAZY=TRUE are fastest, which is normal because lazy reading does not actually read the data values into memory. A more fair comparison is below, which computes a text string for every row after reading the CSV,

cache(compute.chr.vary.rows, atime_read("10_chr_cols_fwrite_*.csv", compute=TRUE))
aplot(compute.chr.vary.rows, "Read CSV with 10 character columns, then collapse each row", 1e9, 1e1, "Number of rows", read.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

Below we read character data with a constant number of rows, and a variable number of columns.

cache(read.chr.vary.cols, atime_read("10_chr_rows_fwrite_*.csv"))
aplot(read.chr.vary.cols, "Read CSV with 10 character rows", 1e8, 1e1, "Number of columns", read.colors)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis

From the comparisons above, it can be seen that for a small number of columns, and a large number of rows, all the methods are about the same (constant factor differences, using more than one thread also results in small constant factor speedups). However for a small number of rows and a large number of columns, data.table::fread is clearly the most efficient:

Summarize by group

The next problem is motivated by a common operation in machine learning code: computing the mean/SD over cross-validation folds.

summary.expr.list <- c(atime::atime_grid(
  list(THREADS=threads.vec),
  "[.data.table"={
    data.table::setDTthreads(THREADS)
    loss.dt[, .(
      loss_length=.N,
      loss_mean=mean(loss),
      loss_sd=sd(loss)
    ), by=.(set, epoch)]
  }),
  atime::atime_grid(
    "base::by"={
      base::by(
        loss.dt$loss, 
        list(loss.dt$set, loss.dt$epoch), 
        function(values)c(
          loss_length=length(values),
          loss_mean=mean(values), 
          loss_sd=sd(values)))
    },
    "base::tapply"={
      base::tapply(
        loss.dt$loss, 
        list(loss.dt$set, loss.dt$epoch), 
        function(values)c(
          loss_length=length(values),
          loss_mean=mean(values), 
          loss_sd=sd(values)))
    }, 
    "stats::aggregate"={
      res <- stats::aggregate(
        loss ~ set + epoch, 
        loss.dt, 
        function(values)list(c(
          loss_length=length(values),
          loss_mean=mean(values), 
          loss_sd=sd(values))))
      data.frame(
        subset(res, select=-loss), 
        do.call(rbind, res$loss))
    },
    "data.table::dcast"={
      dcast(
        loss.dt,
        set + epoch ~ .,
        list(length, mean, sd),
        value.var="loss")
    }),
  if(requireNamespace("dplyr"))atime::atime_grid("dplyr::summarise"={
    loss.dt |> 
      dplyr::group_by(set, epoch) |> 
      dplyr::summarise(
        loss_length=length(loss),
        loss_mean=mean(loss), 
        loss_sd=sd(loss))
  }),
  if(requireNamespace("tidyr"))atime::atime_grid("tidyr::pivot_wider"={
    loss.dt |> 
      tidyr::pivot_wider(
        id_cols = c(set,epoch), 
        values_from=loss, 
        names_from=name, 
        values_fn=function(values)list(c(
          loss_length=length(values),
          loss_mean=mean(values), 
          loss_sd=sd(values)))) |> 
      tidyr::unnest_wider(loss)
  }))
#> Loading required namespace: tidyr
summary.colors <- c(
  some("#D6604D", "#B2182B", "#67001F"),#dark red
  "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", 
  "#E5C494", "#B3B3B3")
names(summary.colors)[1:length(summary.expr.list)] <- names(summary.expr.list)
summary.colors <- summary.colors[names(summary.colors)!=""]
options(dplyr.summarise.inform=FALSE)
cache(summary.atime.list, atime::atime(
  N=as.integer(10^seq(0, 7, by=0.5)),
  setup={
    n.folds <- 10
    loss.dt <- data.table(
      name="loss", 
      fold=rep(1:n.folds, each=2*N),
      loss=rnorm(2*N*n.folds),
      set=rep(c("subtrain","validation"),each=N),
      epoch=1:N,
      key=c("set","epoch","fold"))
  },
  seconds.limit=seconds.limit,
  expr.list=summary.expr.list))
aplot(summary.atime.list, "Length, Mean, SD over 10 folds for each epoch and set", 1e7, 1e1, "Number of epochs", summary.colors)

The comparison above shows that using [.data.table is by far the most efficient method (by constant factors) to compute the Mean and SD over folds.

Join and compute sum

i <- 1:100
DT <- CJ(letter=LETTERS, i)[, x := rnorm(.N)]
setkey(DT, letter, i)
DF <- data.frame(DT)
rownames(DF) <- with(DF, paste0(letter, i))
expr.list <- if(requireNamespace("dplyr"))atime::atime_grid("dplyr::inner_join"=with(dplyr::inner_join(DT, select.dt, by=c('letter','i')), x+y))
cache(atime.join.sum, atime::atime(
  N=10^seq(1, 7),
  setup={
    select.dt <- data.table(
      letter=sample(LETTERS, N, replace=TRUE),
      i=sample(i, N, replace=TRUE),
      y=rnorm(N))
    setkey(select.dt, letter, i)
    select.df <- data.frame(select.dt)
  },
  seconds.limit=seconds.limit,
  "data.table::`[.data.table`"=DT[select.dt, x+y],
  "data.table::merge"=data.table::merge.data.table(DT,select.dt)[, x+y],
  "base::merge.data.frame"=with(base::merge.data.frame(DF, select.df, by=c('letter','i')), x+y),
  "[+paste0"=with(select.df, DF[paste0(letter,i),"x"]+y),
  expr.list=expr.list))
aplot(atime.join.sum, "Join and sum", 1e9, 1e1, "Size of output vector")

Join and summarize

i <- 1:100
DT <- CJ(letter=LETTERS, i)[, x := rnorm(.N)]
setkey(DT, letter, i)
DF <- data.frame(DT)
rownames(DF) <- with(DF, paste0(letter, i))
cache(atime.join.summarize, atime::atime(
  N=as.integer(10^seq(0, 7, by=0.5)),
  setup={
    select.dt <- data.table(
      letter=sample(LETTERS, N, replace=TRUE),
      i=sample(i, N, replace=TRUE),
      y=rnorm(N))
    setkey(select.dt, letter, i)
    select.df <- data.frame(select.dt)
  },
  seconds.limit=seconds.limit,
  "data.table::`[.data.table`"={
    select.dt[DT, .(rows=.N, diff=mean(y)-x), by=.EACHI, nomatch=0L]
  },
  "base::by"={
    do.call(rbind, base::by(
      select.df, 
      with(select.df, paste0(letter, i)), 
      function(sdf){
        srow <- sdf[1,]
        data.frame(
          srow[,c("letter","i")],
          rows=nrow(sdf), 
          diff=mean(sdf$y)-DF[with(srow,paste0(letter,i)),"x"])
      }))
  }, 
  "dplyr::inner_join"={
    dplyr::inner_join(DT, select.dt, by=c('letter','i')) |> 
      dplyr::group_by(letter, i) |> 
      dplyr::summarise(rows=length(y), diff=mean(y)-x[1])
  }))
aplot(atime.join.summarize, "Join and summarize", 1e10, 1e1, "Rows in join table")

Rolling join

This situation arises when you want to compute the average in a regular grid over some irregularly spaced numbers.

digits <- 1
grid.space <- 10^(-digits)
offset <- grid.space/2
cache(atime.roll, atime::atime(
  N=10^seq(1:7),
  setup={
    set.seed(1)
    X <- runif(N)
    Y <- 10*X+rnorm(N)
  },
  seconds.limit=seconds.limit,
  "data.table::[roll=nearest"={
    irreg.dt <- data.table(X, Y, key="X")
    grid <- seq(offset, 1-offset, by=grid.space)
    reg.dt <- data.table(grid, X=grid, key="X")
    join.dt <- reg.dt[irreg.dt, roll="nearest"]
    join.dt[, .(Y.N=.N, Y.mean=mean(Y), Y.sd=sd(Y)), by=grid]
  },
  "round,data.table"={
    data.table(
      grid=round(X+offset, digits=digits)-offset,
      Y
    )[, .(
      Y.N=.N, 
      Y.mean=mean(Y), 
      Y.sd=sd(Y)
    ), by=grid]
  },
  "round,aggregate"={
    grid <- round(X+offset, digits=digits)-offset
    aggregate(Y ~ grid, FUN=function(values)c(
      N=length(values),
      mean=mean(values),
      sd=sd(values)))
  }))
aplot(atime.roll, "Rolling join", 1e10, 1e1, "Rows in join table")

Total time

Sys.time() - time.begin
#> Time difference of 1.324017 mins
seconds.vec <- sapply(cache.list, function(L){
  do.call(sum, L$meas$time)
})
sum(seconds.vec)+time.begin-time.begin
#> Time difference of 34.26997 mins

session info

sessionInfo()
#> R version 4.2.3 (2023-03-15)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 18.04.6 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  utils     datasets  grDevices methods  
#> [7] base     
#> 
#> other attached packages:
#> [1] directlabels_2021.2.24 ggplot2_3.4.0         
#> [3] data.table_1.14.7     
#> 
#> loaded via a namespace (and not attached):
#>  [1] highr_0.9          bslib_0.4.1        compiler_4.2.3    
#>  [4] pillar_1.8.1       jquerylib_0.1.4    tools_4.2.3       
#>  [7] digest_0.6.31      jsonlite_1.8.4     evaluate_0.19     
#> [10] lifecycle_1.0.3    tibble_3.1.8       gtable_0.3.1      
#> [13] lattice_0.20-45    pkgconfig_2.0.3    rlang_1.0.6       
#> [16] cli_3.4.1          parallel_4.2.3     xfun_0.35         
#> [19] fastmap_1.1.0      withr_2.5.0        stringr_1.5.0     
#> [22] dplyr_1.0.10       knitr_1.41         hms_1.1.2         
#> [25] generics_0.1.3     vctrs_0.5.1        sass_0.4.4        
#> [28] tidyselect_1.2.0   grid_4.2.3         glue_1.6.2        
#> [31] atime_2023.1.24    R6_2.5.1           fansi_1.0.3       
#> [34] rmarkdown_2.18     purrr_0.3.5        tidyr_1.2.1       
#> [37] tzdb_0.3.0         readr_2.1.3        farver_2.1.1      
#> [40] magrittr_2.0.3     ellipsis_0.3.2     scales_1.2.1      
#> [43] htmltools_0.5.4    colorspace_2.0-3   quadprog_1.5-8    
#> [46] utf8_1.2.2         stringi_1.7.8.9001 munsell_0.5.0     
#> [49] cachem_1.0.6