The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
When ground truth labels are available, autoFlagR can
evaluate the performance of anomaly detection algorithms using standard
metrics. This is essential for validation studies and publication.
set.seed(123)
# Create dataset
n <- 1000
data <- data.frame(
id = 1:n,
feature1 = rnorm(n, 100, 15),
feature2 = rnorm(n, 50, 10),
feature3 = rpois(n, 5),
category = sample(c("A", "B", "C"), n, replace = TRUE)
)
# Introduce known anomalies (ground truth)
anomaly_indices <- c(1:20, 50:55, 100:110)
data$feature1[anomaly_indices] <- data$feature1[anomaly_indices] * 5 # Extreme values
data$feature2[anomaly_indices] <- data$feature2[anomaly_indices] * 3
data$feature3[anomaly_indices] <- data$feature3[anomaly_indices] * 10
# Create ground truth labels and add to data
data$is_error <- rep(FALSE, n)
data$is_error[anomaly_indices] <- TRUE
cat("Total anomalies in ground truth:", sum(data$is_error), "\n")
#> Total anomalies in ground truth: 37# Score anomalies with ground truth for benchmarking
scored_data <- score_anomaly(
data,
method = "iforest",
contamination = 0.05,
ground_truth_col = "is_error"
)
#> Warning in (function (data, sample_size = min(nrow(data), 10000L), ntrees =
#> 500, : Attempting to use more than 1 thread, but package was compiled without
#> OpenMP support. See
#> https://github.com/david-cortes/installing-optimized-libraries#4-macos-install-and-enable-openmp
# Check if benchmarking was performed
if (!is.null(attr(scored_data, "benchmark_metrics"))) {
cat("Benchmarking metrics available!\n")
}
#> Benchmarking metrics available!# Extract benchmark metrics
metrics <- extract_benchmark_metrics(scored_data)
# Display metrics
cat("AUC-ROC:", round(metrics$auc_roc, 4), "\n")
#> AUC-ROC: 1
cat("AUC-PR:", round(metrics$auc_pr, 4), "\n")
#> AUC-PR: 0.0187
cat("Contamination Rate:", round(metrics$contamination_rate * 100, 2), "%\n")
#> Contamination Rate: 5 %
cat("\nTop-K Recall:\n")
#>
#> Top-K Recall:
for (k_name in names(metrics$top_k_recall)) {
k_value <- gsub("top_", "", k_name)
recall <- metrics$top_k_recall[[k_name]]
cat(" Top", k_value, ":", round(recall * 100, 2), "%\n")
}
#> Top 10 : 0 %
#> Top 50 : 0 %
#> Top 100 : 0 %
#> Top 500 : 0 %# Compare Isolation Forest vs LOF
methods <- c("iforest", "lof")
results <- list()
for (method in methods) {
scored <- score_anomaly(
data,
method = method,
contamination = 0.05,
ground_truth_col = "is_error"
)
metrics <- extract_benchmark_metrics(scored)
results[[method]] <- metrics
}
#> Warning in (function (data, sample_size = min(nrow(data), 10000L), ntrees =
#> 500, : Attempting to use more than 1 thread, but package was compiled without
#> OpenMP support. See
#> https://github.com/david-cortes/installing-optimized-libraries#4-macos-install-and-enable-openmp
# Create comparison table
comparison <- data.frame(
Method = c("Isolation Forest", "Local Outlier Factor"),
AUC_ROC = c(results$iforest$auc_roc, results$lof$auc_roc),
AUC_PR = c(results$iforest$auc_pr, results$lof$auc_pr),
Top_10_Recall = c(results$iforest$top_k_recall$top_10, results$lof$top_k_recall$top_10)
)
comparison
#> Method AUC_ROC AUC_PR Top_10_Recall
#> 1 Isolation Forest 1.0000000 0.01873248 0.0
#> 2 Local Outlier Factor 0.4900508 0.04805959 0.1AUC-ROC: Area under the ROC curve. Higher is better (max = 1.0). Measures the ability to distinguish between normal and anomalous records.
AUC-PR: Area under the Precision-Recall curve. Higher is better (max = 1.0). More informative than AUC-ROC when classes are imbalanced.
Top-K Recall: Percentage of true anomalies found in the top K highest-scoring records. Useful for prioritizing manual review.
Benchmarking with ground truth labels allows you to: - Quantitatively evaluate algorithm performance - Compare different methods and parameters - Report publication-quality metrics - Validate detection capabilities before deployment
For more details, see the Function Reference.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.