The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
library(RScelestial)
# We load igraph for drawing trees. If you do not want to draw,
# there is no need to import igraph.
library(igraph)
#>
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
The RScelestial package could be installed easily as follows
Here we show a simulation. We build a data set with following command.
# Following command generates ten samples with 20 loci.
# Rate of mutations on each edge of the evolutionary tree is 1.5.
D = synthesis(10, 20, 5, seed = 7)
D
#> $seqeunce
#> C1 C2 C3 C4 C5 C6 C7 C8 C9 C10
#> L1 0 0 0 3 0 1 0 3 0 0
#> L2 3 3 3 3 3 3 3 3 0 3
#> L3 0 3 3 0 3 1 0 0 0 3
#> L4 1 0 3 3 1 3 0 1 0 3
#> L5 0 3 3 1 3 3 0 0 0 0
#> L6 3 3 0 3 0 3 3 3 3 3
#> L7 0 0 0 0 3 0 3 0 3 0
#> L8 3 1 0 1 0 3 1 3 3 1
#> L9 0 0 3 3 3 3 3 0 0 3
#> L10 3 3 0 1 3 0 3 3 1 0
#> L11 0 3 0 0 0 0 3 1 3 0
#> L12 3 3 3 0 0 0 3 0 3 3
#> L13 3 1 1 3 0 3 0 0 0 3
#> L14 3 0 0 0 1 3 0 3 3 0
#> L15 0 3 0 3 0 0 3 3 1 0
#> L16 3 0 3 0 3 0 3 0 3 0
#> L17 3 3 0 3 3 3 0 3 1 3
#> L18 0 0 3 3 3 0 1 3 0 3
#> L19 3 3 0 1 3 0 0 3 0 3
#> L20 3 1 3 0 1 3 3 3 3 0
#>
#> $true.sequence
#> C1 C2 C3 C4 C5 C6 C7 C8 C9 C10
#> L1 0 0 0 0 0 0 0 0 0 0
#> L2 0 0 0 0 0 0 0 0 0 0
#> L3 0 0 0 0 0 0 0 0 0 0
#> L4 0 0 0 0 0 0 0 0 0 0
#> L5 0 0 0 0 0 0 0 0 0 0
#> L6 0 0 0 0 0 0 0 0 0 0
#> L7 0 0 0 0 0 0 0 0 0 0
#> L8 0 0 0 1 0 0 1 0 1 1
#> L9 0 0 0 0 0 0 0 0 0 0
#> L10 0 0 0 0 0 0 0 0 0 0
#> L11 0 0 0 0 0 0 0 0 0 0
#> L12 0 0 0 0 0 0 0 0 0 0
#> L13 0 0 0 0 0 0 0 0 0 0
#> L14 0 0 0 0 0 0 0 0 0 0
#> L15 0 0 0 0 0 0 0 0 0 0
#> L16 0 0 0 0 0 0 0 0 0 0
#> L17 0 0 0 0 0 0 0 0 0 0
#> L18 0 0 0 0 0 0 0 0 0 0
#> L19 0 0 0 0 0 0 0 0 0 0
#> L20 0 0 0 0 0 0 0 0 0 0
#>
#> $true.clone
#> $true.clone$N1
#> [1] "C2" "C6"
#>
#> $true.clone$N2
#> [1] "C5"
#>
#> $true.clone$N3
#> [1] "C1" "C8"
#>
#> $true.clone$N4
#> [1] "C3"
#>
#> $true.clone$N6
#> [1] "C4" "C7" "C9" "C10"
#>
#>
#> $true.tree
#> src dest len
#> 1 N2 N1 1
#> 2 N3 N2 1
#> 3 N4 N2 1
#> 4 N6 N3 1
seq = as.ten.state.matrix(D$seqeunce)
SP = scelestial(seq, return.graph = TRUE)
SP
#> $input
#> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
#> C1 A/A ./. A/A C/C A/A ./. A/A ./. A/A ./. A/A ./. ./. ./. A/A ./. ./. A/A ./.
#> C10 A/A ./. ./. ./. A/A ./. A/A C/C ./. A/A A/A ./. ./. A/A A/A A/A ./. ./. ./.
#> C2 A/A ./. ./. A/A ./. ./. A/A C/C A/A ./. ./. ./. C/C A/A ./. A/A ./. A/A ./.
#> C3 A/A ./. ./. ./. ./. A/A A/A A/A ./. A/A A/A ./. C/C A/A A/A ./. A/A ./. A/A
#> C4 ./. ./. A/A ./. C/C ./. A/A C/C ./. C/C A/A A/A ./. A/A ./. A/A ./. ./. C/C
#> C5 A/A ./. ./. C/C ./. A/A ./. A/A ./. ./. A/A A/A A/A C/C A/A ./. ./. ./. ./.
#> C6 C/C ./. C/C ./. ./. ./. A/A ./. ./. A/A A/A A/A ./. ./. A/A A/A ./. A/A A/A
#> C7 A/A ./. A/A A/A A/A ./. ./. C/C ./. ./. ./. ./. A/A A/A ./. ./. A/A C/C A/A
#> C8 ./. ./. A/A C/C A/A ./. A/A ./. A/A ./. C/C A/A A/A ./. ./. A/A ./. ./. ./.
#> C9 A/A A/A A/A A/A A/A ./. ./. ./. A/A C/C ./. ./. A/A ./. C/C ./. C/C A/A A/A
#> V20
#> C1 ./.
#> C10 A/A
#> C2 C/C
#> C3 ./.
#> C4 A/A
#> C5 C/C
#> C6 ./.
#> C7 ./.
#> C8 ./.
#> C9 ./.
#>
#> $sequence
#> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19
#> C1 A/A A/A A/A C/C A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A
#> C10 A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A
#> C2 A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A
#> C3 A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A
#> C4 A/A A/A A/A A/A C/C A/A A/A C/C A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A C/C
#> C5 A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A
#> C6 C/C A/A C/C A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A A/A
#> C7 A/A A/A A/A A/A A/A A/A A/A C/C A/A C/C A/A A/A A/A A/A C/C A/A A/A C/C A/A
#> C8 A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A A/A A/A A/A A/A
#> C9 A/A A/A A/A A/A A/A A/A A/A A/A A/A C/C A/A A/A A/A A/A C/C A/A C/C A/A A/A
#> V20
#> C1 A/A
#> C10 A/A
#> C2 C/C
#> C3 A/A
#> C4 A/A
#> C5 C/C
#> C6 A/A
#> C7 A/A
#> C8 A/A
#> C9 A/A
#>
#> $tree
#> src dest len
#> 1 C3 C10 6.00013
#> 2 C1 C8 6.00014
#> 3 C1 C10 6.00015
#> 4 C4 C10 6.25012
#> 5 C7 C9 6.50012
#> 6 C2 C10 6.50014
#> 7 C6 C10 6.50014
#> 8 C1 C5 6.75016
#> 9 C1 C9 7.00013
#>
#> $graph
#> IGRAPH fd2a474 UNW- 10 9 --
#> + attr: name (v/c), weight (e/n)
#> + edges from fd2a474 (vertex names):
#> [1] C3 --C10 C1 --C8 C10--C1 C10--C4 C7 --C9 C10--C2 C10--C6 C1 --C5
#> [9] C1 --C9
You can draw the graph with following command
Also, we can make a rooted tree with cell “C8” as the root of the tree as follows:
SP = scelestial(seq, root.assign.method = "fix", root = "C8", return.graph = TRUE)
#> [1] "C8 -1"
#> [1] "C1 C8"
#> [1] "C10 C1"
#> [1] "C3 C10"
#> [1] "C4 C10"
#> [1] "C2 C10"
#> [1] "C6 C10"
#> [1] "C5 C1"
#> [1] "C9 C1"
#> [1] "C7 C9"
tree.plot(SP, vertex.size = 30)
Setting root.assign.method to “balance” lets the algorithm decide for a root that produces minimum height tree.
SP = scelestial(seq, root.assign.method = "balance", return.graph = TRUE)
#> [1] "C1 -1"
#> [1] "C8 C1"
#> [1] "C10 C1"
#> [1] "C3 C10"
#> [1] "C4 C10"
#> [1] "C2 C10"
#> [1] "C6 C10"
#> [1] "C5 C1"
#> [1] "C9 C1"
#> [1] "C7 C9"
#> [1] "C1 -1"
#> [1] "C8 C1"
#> [1] "C10 C1"
#> [1] "C3 C10"
#> [1] "C4 C10"
#> [1] "C2 C10"
#> [1] "C6 C10"
#> [1] "C5 C1"
#> [1] "C9 C1"
#> [1] "C7 C9"
tree.plot(SP, vertex.size = 30)
Following command calculates the distance array between pairs of samples.
D.distance.matrix <- distance.matrix.true.tree(D)
D.distance.matrix
#> C2 C6 C5 C1 C8 C3
#> C2 0.000000000 0.000000000 0.006849315 0.013698630 0.013698630 0.013698630
#> C6 0.000000000 0.000000000 0.006849315 0.013698630 0.013698630 0.013698630
#> C5 0.006849315 0.006849315 0.000000000 0.006849315 0.006849315 0.006849315
#> C1 0.013698630 0.013698630 0.006849315 0.000000000 0.000000000 0.013698630
#> C8 0.013698630 0.013698630 0.006849315 0.000000000 0.000000000 0.013698630
#> C3 0.013698630 0.013698630 0.006849315 0.013698630 0.013698630 0.000000000
#> C4 0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#> C7 0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#> C9 0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#> C10 0.020547945 0.020547945 0.013698630 0.006849315 0.006849315 0.020547945
#> C4 C7 C9 C10
#> C2 0.020547945 0.020547945 0.020547945 0.020547945
#> C6 0.020547945 0.020547945 0.020547945 0.020547945
#> C5 0.013698630 0.013698630 0.013698630 0.013698630
#> C1 0.006849315 0.006849315 0.006849315 0.006849315
#> C8 0.006849315 0.006849315 0.006849315 0.006849315
#> C3 0.020547945 0.020547945 0.020547945 0.020547945
#> C4 0.000000000 0.000000000 0.000000000 0.000000000
#> C7 0.000000000 0.000000000 0.000000000 0.000000000
#> C9 0.000000000 0.000000000 0.000000000 0.000000000
#> C10 0.000000000 0.000000000 0.000000000 0.000000000
SP.distance.matrix <- distance.matrix.scelestial(SP)
SP.distance.matrix
#> C1 C10 C2 C3 C4 C5
#> C1 0.000000000 0.004528317 0.009433976 0.009056619 0.009245286 0.005094350
#> C10 0.004528317 0.000000000 0.004905660 0.004528302 0.004716969 0.009622667
#> C2 0.009433976 0.004905660 0.000000000 0.009433961 0.009622629 0.014528326
#> C3 0.009056619 0.004528302 0.009433961 0.000000000 0.009245271 0.014150968
#> C4 0.009245286 0.004716969 0.009622629 0.009245271 0.000000000 0.014339636
#> C5 0.005094350 0.009622667 0.014528326 0.014150968 0.014339636 0.000000000
#> C6 0.009433976 0.004905660 0.009811319 0.009433961 0.009622629 0.014528326
#> C7 0.010188647 0.014716964 0.019622623 0.019245265 0.019433933 0.015282997
#> C8 0.004528309 0.009056626 0.013962286 0.013584928 0.013773595 0.009622659
#> C9 0.005283002 0.009811319 0.014716979 0.014339621 0.014528288 0.010377352
#> C6 C7 C8 C9
#> C1 0.009433976 0.010188647 0.004528309 0.005283002
#> C10 0.004905660 0.014716964 0.009056626 0.009811319
#> C2 0.009811319 0.019622623 0.013962286 0.014716979
#> C3 0.009433961 0.019245265 0.013584928 0.014339621
#> C4 0.009622629 0.019433933 0.013773595 0.014528288
#> C5 0.014528326 0.015282997 0.009622659 0.010377352
#> C6 0.000000000 0.019622623 0.013962286 0.014716979
#> C7 0.019622623 0.000000000 0.014716956 0.004905644
#> C8 0.013962286 0.014716956 0.000000000 0.009811312
#> C9 0.014716979 0.004905644 0.009811312 0.000000000
## Difference between normalized distance matrices
vertices <- rownames(SP.distance.matrix)
sum(abs(D.distance.matrix[vertices,vertices] - SP.distance.matrix))
#> [1] 0.5453399
Given a multiple sequence alignment, Scelestial infers the phylogeny of them. Here we present a simple example. First we load libraries to load a multiple alignment.
library(stringr)
if (!require("seqinr")) install.packages("seqinr")
#> Loading required package: seqinr
library(seqinr)
In this example, we load a multiple alignment from seqinr package.
Then we clean the data and build a zero-one matrix representing taxa and characters. Note that Scelestial accept matrices with taxa as its columns and characters as its rows.
# Removing non-informative columns and duplicate rows.
mcb <- toupper(t(sapply(seq(phylip$seq), function(i) unlist(strsplit(phylip$seq[[i]], '')))))
ccb <- as.character(phylip$seq)
occb <- order(ccb)
cbColMask <- sapply(seq(ncol(mcb)), function(j) length(levels(as.factor(mcb[,j]))) == 1)
cbRowMask <- rep(TRUE, length(ccb))
for (i in seq(length(ccb))) {
if (i == 1 || ccb[occb[i]] != ccb[occb[i-1]]) {
cbRowMask[occb[i]] <- FALSE
}
}
mcbRows <- apply(mcb[!cbRowMask, !cbColMask], MARGIN = 1, FUN = function(a) paste0(str_replace(a, "-", "X"), collapse = ""))
Executing Scelestial on the input matrix.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.