The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
library(tip)
#> Registered S3 method overwritten by 'GGally':
#> method from
#> +.gg ggplot2
# A function to generate random matrices from a matrix normal distribution
<- function(mu, num_rows, num_cols){
random_mat_normal ::rmatrixnorm(M = matrix(mu,
LaplacesDemonnrow = num_rows,
ncol = num_cols),
U = diag(num_rows),
V = diag(num_cols))
}
# Generate 3 clusters of matrices
<- 5
p <- 3
m <- lapply(1:10, function(x) random_mat_normal(mu = 0, num_rows = m, num_cols = p))
c1 <- lapply(1:10, function(x) random_mat_normal(mu = 5, num_rows = m, num_cols = p))
c2 <- lapply(1:10, function(x) random_mat_normal(mu = -5, num_rows = m, num_cols = p))
c3
# Put all the data into a list
<- c(c1,c2,c3)
data_list
# Create a vector of true labels. True labels are only necessary
# for constructing network graphs that incorporate the true labels;
# this is often useful for research.
<- c(rep("Cluster 1", length(c1)),
true_labels rep("Cluster 2", length(c2)),
rep("Cluster 3", length(c3)))
<- matrix(NA,
distance_matrix nrow = length(true_labels),
ncol = length(true_labels))
# Distance matrix
for(i in 1:length(true_labels)){
for(j in i:length(true_labels)){
<- SMFilter::FDist2(mX = data_list[[i]],
distance_matrix[i,j] mY = data_list[[j]])
<- distance_matrix[i,j]
distance_matrix[j,i]
}
}
# Compute the temperature parameter estiamte
<- 1/median(distance_matrix[upper.tri(distance_matrix)])
temperature
# For each subject, compute the point estimate for the number of similar
# subjects using univariate multiple change point detection (i.e.)
= get_cpt_neighbors(.distance_matrix = distance_matrix)
init_num_neighbors #> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
# Set the number of burn-in iterations in the Gibbs samlper
# RECOMMENDATION: burn >= 1000
<- 10
burn
# Set the number of sampling iterations in the Gibbs sampler
# RECOMMENDATION: samples >= 1000
<- 10
samples
# Set the subject names
<- paste(1:dim(distance_matrix)[1])
names_subjects
# Run TIP clustering using only the prior
# --> That is, the likelihood function is constant
<- tip(.data = data_list,
tip1 .burn = burn,
.samples = samples,
.similarity_matrix = exp(-1.0*temperature*distance_matrix),
.init_num_neighbors = init_num_neighbors,
.likelihood_model = "CONSTANT",
.subject_names = names_subjects,
.num_cores = 1)
#> Bayesian Clustering: Table Invitation Prior Gibbs Sampler
#> burn-in: 10
#> samples: 10
#> Likelihood Model: CONSTANT
#>
|
| | 0%
|
|==== | 6%
|
|======== | 11%
|
|============ | 17%
|
|================ | 22%
|
|=================== | 28%
|
|======================= | 33%
|
|=========================== | 39%
|
|=============================== | 44%
|
|=================================== | 50%
|
|======================================= | 56%
|
|=========================================== | 61%
|
|=============================================== | 67%
|
|=================================================== | 72%
|
|====================================================== | 78%
|
|========================================================== | 83%
|
|============================================================== | 89%
|
|================================================================== | 94%
|
|======================================================================| 100%
# Produce plots for the Bayesian Clustering Model
<- plot(tip1) tip_plots
# View the posterior distribution of the number of clusters
$histogram_posterior_number_of_clusters tip_plots
# View the trace plot with respect to the posterior number of clusters
$trace_plot_posterior_number_of_clusters tip_plots
# Extract posterior cluster assignments using the Posterior Expected Adjusted Rand (PEAR) index
<- mcclust::maxpear(psm = tip1@posterior_similarity_matrix)$cl
cluster_assignments
# If the true labels are available, then show the cluster result via a contigency table
table(data.frame(true_label = true_labels,
cluster_assignment = cluster_assignments))
#> cluster_assignment
#> true_label 1 2 3 4
#> Cluster 1 4 5 1 0
#> Cluster 2 7 1 1 1
#> Cluster 3 0 9 0 1
# Create the one component graph with minimum entropy
<- partition_undirected_graph(.graph_matrix = tip1@posterior_similarity_matrix,
partition_list .num_components = 1,
.step_size = 0.001)
# Associate class labels and colors for the plot
<- c("Cluster 1" = "blue",
class_palette_colors "Cluster 2" = 'green',
"Cluster 3" = "red")
# Associate class labels and shapes for the plot
<- c("Cluster 1" = 19,
class_palette_shapes "Cluster 2" = 18,
"Cluster 3" = 17)
# Visualize the posterior similarity matrix by constructing a graph plot of
# the one-cluster graph. The true labels are used here (below they are not).
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = NA,
.subject_class_names = true_labels,
.class_colors = class_palette_colors,
.class_shapes = class_palette_shapes,
.node_size = 2,
.add_node_labels = FALSE)
#> Warning: Duplicated override.aes is ignored.
# If true labels are not available, then construct a network plot
# of the one-cluster graph without any class labels.
# Note: Subject labels may be suppressed using .add_node_labels = FALSE.
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = names_subjects,
.node_size = 2,
.add_node_labels = TRUE)
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.