The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

Performance benchmark

Daijiang Li

2020-12-17

Benchmark for PSV family functions

psv

library(phyr)

# simulate data
nspp = 500
nsite = 100
tree_sim = ape::rtree(n = nspp)
comm_sim = matrix(rbinom(nspp * nsite, size = 1, prob = 0.6), 
                  nrow = nsite, ncol = nspp)
row.names(comm_sim) = paste0("site_", 1:nsite)
colnames(comm_sim) = paste0("t", 1:nspp)
comm_sim = comm_sim[, tree_sim$tip.label]
# about 40 times faster
rbenchmark::benchmark(
  "picante" = {picante::psv(comm_sim, tree_sim)},
  "phyr R" = {phyr::psv(comm_sim, tree_sim, cpp = FALSE)},
  "phyr c++" = {phyr::psv(comm_sim, tree_sim, cpp = TRUE)},
  replications = 10,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self"))
#>       test replications elapsed relative user.self sys.self
#> 3 phyr c++           10   0.339    1.000     0.298    0.030
#> 2   phyr R           10   3.287    9.696     2.907    0.303
#> 1  picante           10  16.265   47.979    14.824    0.795

pse

comm_sim = matrix(rpois(nspp * nsite, 3), nrow = nsite, ncol = nspp)
row.names(comm_sim) = paste0("site_", 1:nsite)
colnames(comm_sim) = paste0("t", 1:nspp)
comm_sim = comm_sim[, tree_sim$tip.label]
# about 2-3 times faster
rbenchmark::benchmark(
  "picante" = {picante::pse(comm_sim, tree_sim)},
  "phyr R" = {phyr::pse(comm_sim, tree_sim, cpp = FALSE)},
  "phyr c++" = {phyr::pse(comm_sim, tree_sim, cpp = TRUE)},
  replications = 20,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self"))
#>       test replications elapsed relative user.self sys.self
#> 3 phyr c++           20   1.456    1.000     1.329    0.105
#> 2   phyr R           20   4.233    2.907     3.453    0.555
#> 1  picante           20   3.858    2.650     3.319    0.475

pcd

# pcd is about 20 times faster
rbenchmark::benchmark(
  "phyr" = {phyr::pcd(comm = comm_a, tree = phylotree, reps = 1000, verbose = FALSE)},
  "picante" = {picante::pcd(comm = comm_a, tree = phylotree, reps = 1000)},
  replications = 10,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self"))
#>      test replications elapsed relative user.self sys.self
#> 1    phyr           10   0.214    1.000     0.192    0.012
#> 2 picante           10   4.516   21.103     4.043    0.074

Benchmark for community PGLMM (pglmm)

To compare the cpp version and R version, and the version from the pez package.

library(dplyr)
comm = comm_a
comm$site = row.names(comm)
dat = tidyr::gather(comm, key = "sp", value = "freq", -site) %>% 
  left_join(envi, by = "site") %>% 
  left_join(traits, by = "sp")
dat$pa = as.numeric(dat$freq > 0)
# data prep for pez::communityPGLMM, not necessary for phyr::pglmm
dat = arrange(dat, site, sp)
dat = filter(dat, sp %in% sample(unique(dat$sp), 10),
             site %in% sample(unique(dat$site), 5))
nspp = n_distinct(dat$sp)
nsite = n_distinct(dat$site)

dat$site = as.factor(dat$site)
dat$sp = as.factor(dat$sp)

tree = ape::drop.tip(phylotree, setdiff(phylotree$tip.label, unique(dat$sp)))
Vphy <- ape::vcv(tree)
Vphy <- Vphy/max(Vphy)
Vphy <- Vphy/exp(determinant(Vphy)$modulus[1]/nspp)
Vphy = Vphy[levels(dat$sp), levels(dat$sp)]

# prepare random effects
re.site <- list(1, site = dat$site, covar = diag(nsite))
re.sp <- list(1, sp = dat$sp, covar = diag(nspp))
re.sp.phy <- list(1, sp = dat$sp, covar = Vphy)
# sp is nested within site
re.nested.phy <- list(1, sp = dat$sp, covar = Vphy, site = dat$site)
re.nested.rep <- list(1, sp = dat$sp, covar = solve(Vphy), site = dat$site) # equal to sp__@site
# can be named 
re = list(re.sp = re.sp, re.sp.phy = re.sp.phy, re.nested.phy = re.nested.phy, re.site = re.site)

# about 4-10 times faster for a small dataset
rbenchmark::benchmark(
  "phyr c++ bobyqa" = {phyr::pglmm(freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "bobyqa")},
  "phyr c++ Nelder-Mead" = {phyr::pglmm(freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "Nelder-Mead")},
  "phyr R Nelder-Mead" = {phyr::pglmm(freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = FALSE, optimizer = "Nelder-Mead")},
  "pez R Nelder-Mead" = {pez::communityPGLMM(freq ~ 1 + shade, data = dat, sp = dat$sp, site = dat$site, 
                      random.effects = re, REML = FALSE)},
  replications = 5,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self")
)
#>                   test replications elapsed relative user.self sys.self
#> 4    pez R Nelder-Mead            5  32.214   88.989    30.821    0.374
#> 1      phyr c++ bobyqa            5   0.362    1.000     0.342    0.006
#> 2 phyr c++ Nelder-Mead            5   1.156    3.193     1.115    0.015
#> 3   phyr R Nelder-Mead            5  33.281   91.936    31.198    0.480

# about 6 times faster for a small dataset
rbenchmark::benchmark(
  "phyr c++ bobyqa" = {phyr::pglmm(pa ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, family = "binomial", cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "bobyqa")},
  "phyr c++ Nelder-Mead" = {phyr::pglmm(pa ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, family = "binomial", cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = TRUE, optimizer = "Nelder-Mead")},
  "phyr R Nelder-Mead" = {phyr::pglmm(pa ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), 
              dat, family = "binomial", cov_ranef = list(sp = phylotree), REML = FALSE, 
              cpp = FALSE, optimizer = "Nelder-Mead")},
  "pez R Nelder-Mead" = {pez::communityPGLMM(pa ~ 1 + shade, data = dat, sp = dat$sp, 
                                             family = "binomial", site = dat$site, 
                      random.effects = re, REML = FALSE)},
  replications = 5,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self")
)
#>                   test replications elapsed relative user.self sys.self
#> 4    pez R Nelder-Mead            5  49.296   42.314    45.731    0.604
#> 1      phyr c++ bobyqa            5   1.840    1.579     1.750    0.033
#> 2 phyr c++ Nelder-Mead            5   1.165    1.000     1.093    0.021
#> 3   phyr R Nelder-Mead            5  24.355   20.906    23.024    0.317

Benchmark for cor_phylo()

library(ape)
# Set up parameter values for simulating data
n <- 50
phy <- rcoal(n, tip.label = 1:n)
trt_names <- paste0("par", 1:2)

R <- matrix(c(1, 0.7, 0.7, 1), nrow = 2, ncol = 2)
d <- c(0.3, 0.95)
B2 <- 1

Se <- c(0.2, 1)
M <- matrix(Se, nrow = n, ncol = 2, byrow = TRUE)
colnames(M) <- trt_names

# Set up needed matrices for the simulations
p <- length(d)

star <- stree(n)
star$edge.length <- array(1, dim = c(n, 1))
star$tip.label <- phy$tip.label

Vphy <- vcv(phy)
Vphy <- Vphy/max(Vphy)
Vphy <- Vphy/exp(determinant(Vphy)$modulus[1]/n)

tau <- matrix(1, nrow = n, ncol = 1) %*% diag(Vphy) - Vphy
C <- matrix(0, nrow = p * n, ncol = p * n)
for (i in 1:p) for (j in 1:p) {
  Cd <- (d[i]^tau * (d[j]^t(tau)) * (1 - (d[i] * d[j])^Vphy))/(1 - d[i] * d[j])
  C[(n * (i - 1) + 1):(i * n), (n * (j - 1) + 1):(j * n)] <- R[i, j] * Cd
}
MM <- matrix(M^2, ncol = 1)
V <- C + diag(as.numeric(MM))

iD <- t(chol(V))

XX <- iD %*% rnorm(2 * n)
X <- matrix(XX, n, p)
colnames(X) <- trt_names
rownames(X) <- phy$tip.label
rownames(M) <- phy$tip.label

U <- list(cbind(rnorm(n, mean = 2, sd = 10)))
names(U) <- trt_names[2]

X[,2] <- X[,2] + B2[1] * U[[1]][,1] - B2[1] * mean(U[[1]][,1])

z <- cor_phylo(variates = X,
               covariates = U,
               meas_errors = M,
               phy = phy,
               species = phy$tip.label)


U2 <- list(NULL, matrix(rnorm(n, mean = 2, sd = 10), nrow = n, ncol = 1))
rownames(U2[[2]]) <- phy$tip.label
colnames(U2[[2]]) <- "par2"
X2 = X
X2[,2] <- X2[,2] + B2[1] * U2[[2]][,1] - B2[1] * mean(U2[[2]][,1])

z_r <- corphylo(X = X2, SeM = M, U = U2, phy = phy, method = "Nelder-Mead")

rbenchmark::benchmark(
  "cor_phylo" = {cor_phylo(variates = X, covariates = U, meas_errors = M,
                           phy = phy, species = phy$tip.label)},
  "corphylo" = {corphylo(X = X2, SeM = M, U = U2, phy = phy, method = "Nelder-Mead")},
  replications = 5,
  columns = c("test", "replications", "elapsed",
              "relative", "user.self", "sys.self")
)
#>        test replications elapsed relative user.self sys.self
#> 1 cor_phylo            5   4.511    1.000     4.329    0.062
#> 2  corphylo            5  16.190    3.589    13.863    1.369

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.