The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
This vignette introduces the SpaCOAP workflow for the analysis of spatial multi-omics dataset.
We demonstrate the use of SpaCOAP to one Spleen SPOTS data that are here, which can be downloaded to the current working path by the following command:
url1 <- "https://github.com/feiyoung/SpaCOAP/tree/master/vignettes_data/"
rna_object <- "seu_rna_over_Spleen.RDS?raw=true"
download.file(paste0(url1, rna_object),"seu_rna_over_Spleen.RDS",mode='wb')
protein_object <- "seu_adt_over_Spleen.RDS?raw=true"
download.file(paste0(url1,protein_object), 'seu_adt_over_Spleen.RDS', mode='wb')
## download  annotation
download.file(paste0(url1, "cell_clusters_anno.rds?raw=true"), "cell_clusters_anno.rds", "wb")Then load to R. Here, we only focus one section.
seu_rna_over <- readRDS("./seu_rna_over_Spleen.RDS")
seu_adt_over <- readRDS("./seu_adt_over_Spleen.RDS")
load("./cell_clusters_anno.rds")The package can be loaded with the command:
Define some functions
searchRadius <- function(pos, lower.med=8, upper.med=10, radius.upper= NULL){
  if (!inherits(pos, "matrix"))
    stop("method is only for  matrix object!")
  
  
  ## Automatically determine the upper radius
  n_spots <- nrow(pos)
  idx <- sample(n_spots, min(100, n_spots))
  dis <- dist(pos[idx,])
  if(is.null(radius.upper)){
    #radius.upper <- max(dis)
    radius.upper <- sort(dis)[20] ## select the nearest 20 spots.
  }
  radius.lower <- min(dis[dis>0])
  Adj_sp <- DR.SC:::getneighborhood_fast(pos, radius=radius.upper)
  Med <- summary(Matrix::rowSums(Adj_sp))['Median']
  if(Med < lower.med) stop("The radius.upper is too smaller that cannot find median neighbors greater than 4.")
  start.radius <- 1
  Med <- 0
  message("Find the adjacency matrix by bisection method...")
  maxIter <- 30
  k <- 1
  while(!(Med >= lower.med && Med <=upper.med)){ # ensure that each spot has about 4~6 neighborhoods in median.
    
    Adj_sp <- DR.SC:::getneighborhood_fast(pos, radius=start.radius)
    Med <- summary(Matrix::rowSums(Adj_sp))['Median']
    if(Med < lower.med){
      radius.lower <- start.radius
      start.radius <- (radius.lower + radius.upper)/2
    }else if(Med >upper.med){
      radius.upper <- start.radius
      start.radius <- (radius.lower + radius.upper)/2
    }
    message("Current radius is ", round(start.radius, 2))
    message("Median of neighborhoods is ", Med)
    if(k > maxIter) {
      message("Reach the maximum iteration but can not find a proper radius!")
      break;
    }
    k <- k + 1
  }
  
  return(start.radius)
}
acc_fun <- function(y1, y2){
  n1 <- length(unique(y1))
  n2 <- length(unique(y2))
  if(n1<n2){ ## ensure n1> n2
    a <- y1
    y1 <- y2
    y2 <- a
    n1 <- length(unique(y1))
    n2 <- length(unique(y2))
  }
  cm <- as.matrix(table(Actual = y1, Predicted = y2))
  rnames <-row.names(cm)
  cnames <- colnames(cm)
  union_names <- union(rnames, cnames)
  n <- length(union_names)
  cm_new <- matrix(0, n, n)
  row.names(cm_new) <- colnames(cm_new) <- union_names
  for(r in 1:n2){
    cm_new[rnames,cnames[r]] <- cm[rnames,cnames[r]]
  }
  
  sum(diag(cm_new)) / length(y1)
}
kappa_fun <- function(y1, y2){
  require(irr)
  dat <- data.frame(y1, y2)
  k_res <- kappa2(dat)
  k_res$value
}Wrap the data matrix from the SeuratObject, including the RNA
expression count matrix X_count, covraite matrix
H associated with protein markters, control variables (here
only an intercept term) Z, and spatial coordinate matrix
pos.
X_count <- Matrix::t(seu_rna_over[["RNA"]][seu_rna_over[['RNA']]@var.features,])
X_count <-  as.matrix(X_count)
H <- t(as.matrix(seu_adt_over[["ADT"]]@data))
Z <- matrix(1, nrow(H), 1)
pos <- cbind(seu_rna_over$X0, seu_rna_over$X1)
radius_use <- searchRadius(pos, radius.upper = NULL)
set.seed(1)
n_spots <- nrow(pos)
idx <- sample(n_spots, min(100, n_spots))
dis <- dist(pos[idx,])
Adj_sp <-  SpaCOAP:::getneighbor_weightmat(pos, radius = radius_use, width=median(dis))Next, we select the number of factors and the rank of regreesion coefficient matrix using the proposed criterion.
q_max <- 20
d <- ncol(H)
rank_max <- d
tic <- proc.time()
reslist_max <- SpaCOAP(X_count, Adj_sp,  H,  Z,
                   rank_use = rank_max, q=q_max, epsELBO = 1e-8,  maxIter = 30)
toc <- proc.time()
time_spacoap_max <- toc[3] - tic[3]We apply the criterion, taking into account that the real data does not necessarily originate from our model. Therefore, we opt for a conservative approach that retains more information.
threshold=c(1e-15, 1e-20)
thre1 <- threshold[1]
beta_svalues <- svd(reslist_max$bbeta)$d
beta_svalues <- beta_svalues[beta_svalues>thre1]
ratio1 <- beta_svalues[-length(beta_svalues)] / beta_svalues[-1]
ratio1[1:10]
## Here, we set hr = 9 rather 1 to retain more information
thre2 <- threshold[2]
B_svalues <- svd(reslist_max$B)$d
B_svalues <- B_svalues[B_svalues>thre2]
ratio_fac <- B_svalues[-length(B_svalues)] / B_svalues[-1]
ratio_fac[1:6]
# [1] 6.123909e+00 2.749414e+00 1.376498e+00 1.314487e+00 3.310699e+14
# Here, we choose q=5 since huge decrease of singular values happen in q=5.
hq <- 5First, we run the proposed SpaCOAP method.
hr <- 9;hq <- 5
featureList <- list()
tic <- proc.time()
reslist <- SpaCOAP(X_count, Adj_sp, H=H, Z= Z,  
                   rank_use = hr, q=hq, epsELBO = 1e-8)
toc <- proc.time()
time_spacoap <- toc[3] - tic[3]
Matrix::rankMatrix(reslist$bbeta)
svd_x <- svd(reslist$bbeta, nu = hr, nv=hr)
H_spacoap <- H %*% svd_x$v
(R2_spacoap <- ProFAST::get_r2_mcfadden(embeds= cbind(H_spacoap, reslist$F), y=as.factor(cell_clusters)))
featureList[['SpaCOAP']] <- cbind(H_spacoap, reslist$F)First, we run COAP and calculate the MacFadden’s R-square.
##COAP
library(COAP)
tic <- proc.time()
res_coap <- RR_COAP(X_count, Z = cbind(Z, H), rank_use= 2+hr, q=hq, 
                    epsELBO = 1e-7, maxIter = 30)
toc <- proc.time()
time_coap <- toc[3] - tic[3]
save(res_coap, time_coap, file='reslist_time_coap.rds')
svd_x <- svd(res_coap$bbeta[,-c(1)], nu = hr, nv=hr)
H_coap <- H %*% svd_x$v
(R2_coap <- ProFAST::get_r2_mcfadden(embeds= cbind(res_coap$H, H_coap), y=as.factor(cell_clusters)))
featureList[['COAP']] <- cbind(res_coap$H, H_coap)Second, we run MRRR and calculate the MacFadden’s R-square.
## MRRR
mrrr_run <- function(Y, X, rank0, q=NULL, family=list(poisson()),
                     familygroup=rep(1,ncol(Y)), epsilon = 1e-4, sv.tol = 1e-2,
                     maxIter = 2000, trace=TRUE, truncflag=FALSE, trunc=500){
  # epsilon = 1e-4; sv.tol = 1e-2; maxIter = 30; trace=TRUE
  # Y <- X_count; X <- cbind(Z, H); rank0 = r + ncol(Z)
  
  require(rrpack)
  
  n <- nrow(Y); p <- ncol(Y)
  
  if(!is.null(q)){
    rank0 <- rank0+q
    X <- cbind(X, diag(n))
  }
  if(truncflag){
    ## Trunction
    Y[Y>trunc] <- trunc
    
  }
  
  svdX0d1 <- svd(X)$d[1]
  init1 = list(kappaC0 = svdX0d1 * 5)
  offset = NULL
  control = list(epsilon = epsilon, sv.tol = sv.tol, maxit = maxIter,
                 trace = trace, gammaC0 = 1.1, plot.cv = TRUE,
                 conv.obj = TRUE)
  fit.mrrr <- mrrr(Y=Y, X=X[,-1], family = family, familygroup = familygroup,
                   penstr = list(penaltySVD = "rankCon", lambdaSVD = 1),
                   control = control, init = init1, maxrank = rank0)
  
  return(fit.mrrr)
}
tic <- proc.time()
res_mrrr <- mrrr_run(X_count, cbind(Z,H), rank0=hr+ncol(Z), q=hq)
str(res_mrrr)
toc <- proc.time()
time_mrrr <- toc[3] - tic[3]
hbbeta_mrrr <-t(res_mrrr$coef[1:ncol(cbind(Z,H)), ])
svd_x <- svd(hbbeta_mrrr[,-c(1)], nu = hr, nv=hr)
H_mrrr <- H %*% svd_x$v
Theta_hb <- (res_mrrr$coef[(ncol(cbind(Z,H))+1): (nrow(cbind(Z,H))+ncol(cbind(Z,H))), ])
svdTheta <- svd(Theta_hb, nu=hq, nv=hq)
F_mrrr <- svdTheta$u
(R2_mrrr <- ProFAST::get_r2_mcfadden(embeds= cbind(H_mrrr, F_mrrr), y=as.factor(cell_clusters)))
featureList[['MRRR']] <- cbind(H_mrrr, F_mrrr)Finally, we execute FAST and compute the MacFadden’s R-squared. It is worth noting that we refrain from comparing with PLNPCA in this demo due to its time-consuming nature.
fast_run <- function(X_count, Adj_sp, q, verbose=TRUE, epsELBO=1e-8){
  require(ProFAST)
  reslist <- FAST_run(XList = list(X_count), 
                      AdjList = list(Adj_sp), q = q, fit.model = 'poisson', 
                      verbose=verbose, epsLogLik=epsELBO)
  reslist$hV <- reslist$hV[[1]]
  return(reslist)
}
tic <- proc.time()
res_fast <- fast_run(X_count, Adj_sp, q=hq, verbose=TRUE, epsELBO=1e-8)
toc <- proc.time()
time_fast <- toc - tic
(R2_fast <- ProFAST::get_r2_mcfadden(embeds= res_fast$hV, y=as.factor(cell_clusters)))
featureList[['FAST']] <- res_fast$hVWe evaluate the correlation between the low-dimensional representations learned by SpaCOAP and other methods with the annotated cell clusters using two metrics. The first metric is the adjusted MacFadden’s \(\mathrm{R}^2\), referred to as \(\mathrm{MacR}^2\). This measure quantifies the amount of biological information captured by the representations, where a higher value signifies superior performance in representation learning.
R2List <- list()
cell_label <- cell_clusters
for(im in 1: length(featureList)){
  message("im = ", im)
  R2List[[im]] <- ProFAST::get_r2_mcfadden(embeds= featureList[[im]], y=cell_label)
}
names(R2List) <- names(featureList)Finally, we employ a classification model trained through randomForest, leveraging the cell clusters and the learned low-dimensional representations by SpaCOAP and other methods. We then compare the prediction performance of this model. To achieve this, we randomly divide the samples into training and testing data with a ratio of \(7:3\). We evaluate the model’s performance using prediction accuracy (ACC) and Cohen’s Kappa on the testing data. In contrast of ACC, Kappa offers a more robust measure of accuracy, as it takes into account the possibility that the agreement occurs by chance. We repeat this division process ten times to ensure the reliability of our findings.
N <- 10
n <- length(cell_label)
methodNames <- c("SpaCOAP", "COAP",  "MRRR", "FAST")
n_methods <- length(methodNames)
metricList <- list(ACC = matrix(NA,N, n_methods), Kappa = matrix(NA,N, n_methods))
for(ii in 1: length(metricList)) colnames(metricList[[ii]]) <- methodNames
library(randomForest)
for(i in 1:N){
  # i <- 1
  message("i = ", i)
  set.seed(i)
  idx_train <- sort(sample(n, round(n*0.7)))
  idx_test <- sort(setdiff(1:n, idx_train))
  rf_spacoap <- randomForest(featureList[['SpaCOAP']][idx_train,], y=cell_label[idx_train])
  hy_spacoap <- predict(rf_spacoap, newdata=featureList[['SpaCOAP']][idx_test,])
  metricList$ACC[i,1] <- acc_fun(hy_spacoap, cell_label[idx_test])
  metricList$Kappa[i,1] <- kappa_fun(hy_spacoap, cell_label[idx_test])
  
  rf_coap <- randomForest(featureList[['COAP']][idx_train,], y=cell_label[idx_train])
  hy_coap <- predict(rf_coap, newdata=featureList[['COAP']][idx_test,])
  metricList$ACC[i,2] <- acc_fun(hy_coap, cell_label[idx_test])
  metricList$Kappa[i,2] <- kappa_fun(hy_coap, cell_label[idx_test])
 
  colnames(featureList[['MRRR']]) <- paste0("MRRR", 1:ncol(featureList[['MRRR']]))
  rf_MRRR <- randomForest(featureList[['MRRR']][idx_train,], y=cell_label[idx_train])
  hy_MRRR <- predict(rf_MRRR, newdata=featureList[['MRRR']][idx_test,])
  metricList$ACC[i,3] <- acc_fun(hy_MRRR, cell_label[idx_test])
  metricList$Kappa[i,3] <- kappa_fun(hy_MRRR, cell_label[idx_test])
  colnames(featureList[['FAST']]) <- paste0("FAST", 1:ncol(featureList[['FAST']]))
  rf_FAST <- randomForest(featureList[['FAST']][idx_train,], y=cell_label[idx_train])
  hy_FAST <- predict(rf_FAST, newdata=featureList[['FAST']][idx_test,])
  metricList$ACC[i,4] <- acc_fun(hy_FAST, cell_label[idx_test])
  metricList$Kappa[i,4] <- kappa_fun(hy_FAST, cell_label[idx_test])
}
DT::datatable(t(round(sapply(metricList, colMeans, na.rm=T),2) ))sessionInfo()
#> R version 4.4.1 (2024-06-14 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 11 x64 (build 26100)
#> 
#> Matrix products: default
#> 
#> 
#> locale:
#> [1] LC_COLLATE=C                               
#> [2] LC_CTYPE=Chinese (Simplified)_China.utf8   
#> [3] LC_MONETARY=Chinese (Simplified)_China.utf8
#> [4] LC_NUMERIC=C                               
#> [5] LC_TIME=Chinese (Simplified)_China.utf8    
#> 
#> time zone: Asia/Shanghai
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> loaded via a namespace (and not attached):
#>  [1] digest_0.6.37     R6_2.5.1          fastmap_1.2.0     xfun_0.47        
#>  [5] cachem_1.1.0      knitr_1.48        htmltools_0.5.8.1 rmarkdown_2.28   
#>  [9] lifecycle_1.0.4   cli_3.6.3         sass_0.4.9        jquerylib_0.1.4  
#> [13] compiler_4.4.1    rstudioapi_0.16.0 tools_4.4.1       evaluate_1.0.0   
#> [17] bslib_0.8.0       yaml_2.3.10       rlang_1.1.4       jsonlite_1.8.9These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.