All mosum
code is commented out due to [#4].
All mcp
code is commented out due to link
1 and link
2.
All bcp
code is commented out due to [#5]
for (
package in c(
"ggplot2", "mvtnorm",
# "bcp",
"breakfast", "changepoint", "cpm", "CptNonPar", "ecp", "fpop", "gfpop",
"InspectChangepoint", "jointseg", "mcp", "mosum", "not", "Rbeast",
"segmented", "stepR", "strucchange", "VARDetect", "wbs"
)
) {
if (!requireNamespace(package, quietly = TRUE)) utils::install.packages(
package, repos = "https://cloud.r-project.org", quiet = TRUE
)
}
# Univariate mean change
set.seed(1)
p <- 1
mean_data_1 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)
plot.ts(mean_data_1)
# Univariate mean and/or variance change
set.seed(1)
p <- 1
mv_data_1 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(50, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(50, p))
)
plot.ts(mv_data_1)
# Multivariate mean change
set.seed(1)
p <- 3
mean_data_3 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)
plot.ts(mean_data_3)
# Multivariate mean and/or variance change
set.seed(1)
p <- 3
mv_data_3 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(50, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(50, p))
)
plot.ts(mv_data_3)
# Linear regression
set.seed(1)
n <- 300
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(c(1, 3.2, -1, 0), c(-1, -0.5, 2.5, -2), c(0.8, 0, 1, 2))
y <- c(
x[1:100, ] %*% theta_0[1, ] + rnorm(100, 0, 3),
x[101:200, ] %*% theta_0[2, ] + rnorm(100, 0, 3),
x[201:n, ] %*% theta_0[3, ] + rnorm(100, 0, 3)
)
lm_data <- data.frame(y = y, x = x)
plot.ts(lm_data)
# Logistic regression
set.seed(1)
n <- 500
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
y <- c(
rbinom(300, 1, 1 / (1 + exp(-x[1:300, ] %*% theta[1, ]))),
rbinom(200, 1, 1 / (1 + exp(-x[301:n, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)
plot.ts(binomial_data)
# Poisson regression
set.seed(1)
n <- 1100
p <- 3
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
delta <- rnorm(p)
theta_0 <- c(1, 0.3, -1)
y <- c(
rpois(500, exp(x[1:500, ] %*% theta_0)),
rpois(300, exp(x[501:800, ] %*% (theta_0 + delta))),
rpois(200, exp(x[801:1000, ] %*% theta_0)),
rpois(100, exp(x[1001:1100, ] %*% (theta_0 - delta)))
)
poisson_data <- data.frame(y = y, x = x)
plot.ts(log(poisson_data$y))
# Lasso
set.seed(1)
n <- 480
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(
runif(p_true, -5, -2),
runif(p_true, -3, 3),
runif(p_true, 2, 5),
runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
x[321:n, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
lasso_data <- data.frame(y = y, x = x)
plot.ts(lasso_data[, seq_len(p_true + 1)])
# AR(3)
set.seed(1)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
ar_data <- x[-seq_len(3)]
plot.ts(ar_data)
# GARCH(1, 1)
set.seed(1)
n <- 400
sigma_2 <- rep(1, n + 1)
x <- rep(0, n + 1)
for (i in seq_len(200)) {
sigma_2[i + 1] <- 20 + 0.5 * x[i]^2 + 0.1 * sigma_2[i]
x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
for (i in 201:400) {
sigma_2[i + 1] <- 1 + 0.1 * x[i]^2 + 0.5 * sigma_2[i]
x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
garch_data <- x[-1]
plot.ts(garch_data)
# VAR(2)
set.seed(1)
n <- 800
p <- 2
theta_1 <- matrix(c(-0.3, 0.6, -0.5, 0.4, 0.2, 0.2, 0.2, -0.2), nrow = p)
theta_2 <- matrix(c(0.3, -0.4, 0.1, -0.5, -0.5, -0.2, -0.5, 0.2), nrow = p)
x <- matrix(0, n + 2, p)
for (i in 1:500) {
x[i + 2, ] <- theta_1 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
for (i in 501:n) {
x[i + 2, ] <- theta_2 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
var_data <- x[-seq_len(2), ]
plot.ts(var_data)
The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.
# Slow
strucchange::breakpoints(y ~ 1, data = data.frame(y = mean_data_1))$breakpoints
#> [1] 300 700
# Data need to be processed. `G` is selected based on the example
if (interactive()) {
mosum::mosum(c(mean_data_1), G = 40)$cpts.info$cpts
}
gfpop::gfpop(
data = mean_data_1,
mygraph = gfpop::graph(
penalty = 2 * log(nrow(mean_data_1)) * gfpop::sdDiff(mean_data_1) ^ 2,
type = "updown"
),
type = "mean"
)$changepoints
#> [1] 300 700 1000
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mean_data_1),
threshold = InspectChangepoint::compute.threshold(
nrow(mean_data_1), ncol(mean_data_1)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
#> [1] 300 700
Rbeast::beast(
mean_data_1, season = "none", print.progress = FALSE, quiet = TRUE
)$trend$cp
#> [1] 701 301 NaN NaN NaN NaN NaN NaN NaN NaN
segmented::segmented(
lm(y ~ 1 + x, data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1)))),
seg.Z = ~ x
)$psi[, "Est."]
#> [1] 495
# Slowest
if (interactive()) {
plot(
mcp::mcp(
list(y ~ 1, ~ 1, ~ 1),
data = data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1))),
par_x = "x"
)
)
}
The true change points are 300, 700, 1000, 1300 and 1700. Some methods are plotted due to the un-retrievable change points.
cpm::processStream(mv_data_1, cpmType = "GLR")$changePoints
#> [1] 293 300 403 408 618 621 696 1000 1021 1024 1293 1300 1417 1693 1981
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mv_data_1),
threshold = InspectChangepoint::compute.threshold(
nrow(mv_data_1), ncol(mv_data_1)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
#> [1] 300 700 702 704 708 712 715 716 717 718 721 722 723 726 727
#> [16] 732 734 736 740 742 744 746 748 750 753 755 756 757 759 764
#> [31] 765 768 771 772 774 791 794 798 799 801 802 803 807 809 810
#> [46] 813 815 817 819 826 827 828 829 831 833 835 836 837 838 840
#> [61] 841 842 843 845 848 849 852 854 860 862 864 866 868 870 872
#> [76] 875 879 881 884 886 887 888 889 896 897 898 901 912 913 915
#> [91] 919 922 923 927 932 934 936 940 944 945 947 948 958 961 962
#> [106] 963 964 966 967 968 972 974 976 978 979 1300 1700 1702 1703 1704
#> [121] 1708 1710 1712 1714 1716 1717 1718 1721 1723 1725 1726 1731 1733 1735 1736
#> [136] 1737 1739 1745 1752 1754 1756 1758 1759 1768 1770 1771 1778 1785 1790 1793
#> [151] 1795 1796 1797 1799 1800 1805 1806 1807 1808 1821 1828 1833 1835 1837 1840
#> [166] 1841 1842 1848 1851 1852 1854 1855 1857 1876 1879 1880 1882 1883 1884 1887
#> [181] 1889 1894 1898 1899 1905 1906 1907 1908 1909 1912 1919 1926 1927 1930 1933
#> [196] 1934 1935 1936 1940 1952 1955 1956 1960 1962 1963 1966 1967 1969 1970 1976
#> [211] 1977 1978 1980 1985 1987 1990
Rbeast::beast(
mv_data_1, season = "none", print.progress = FALSE, quiet = TRUE
)$trend$cp
#> [1] 301 1301 1794 701 1855 1986 1981 1769 1859 1779
# Slower
if (interactive()) {
plot(
mcp::mcp(
list(y ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1),
data = data.frame(y = mv_data_1, x = seq_len(nrow(mv_data_1))),
par_x = "x"
)
)
}
#> Press [enter] to continue
The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mean_data_3),
threshold = InspectChangepoint::compute.threshold(
nrow(mean_data_3), ncol(mean_data_3)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
#> [1] 300 700
invisible(
capture.output(
result_Rbeast <- Rbeast::beast123(
mean_data_3,
metadata = list(whichDimIsTime = 1),
season = "none"
)
)
)
result_Rbeast$trend$cp
#> [,1] [,2] [,3]
#> [1,] 701 301 701
#> [2,] 301 701 301
#> [3,] 617 NaN 318
#> [4,] 158 NaN NaN
#> [5,] NaN NaN NaN
#> [6,] NaN NaN NaN
#> [7,] NaN NaN NaN
#> [8,] NaN NaN NaN
#> [9,] NaN NaN NaN
#> [10,] NaN NaN NaN
The true change points are 300, 700, 1000, 1300 and 1700. Some methods are plotted due to the un-retrievable change points.
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mv_data_3),
threshold = InspectChangepoint::compute.threshold(
nrow(mv_data_3), ncol(mv_data_3)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
#> [1] 300 700 701 703 704 705 707 708 710 711 712 714 715 716 717
#> [16] 719 721 722 723 725 726 728 729 730 731 732 734 735 736 738
#> [31] 739 740 741 742 744 745 747 748 749 752 753 754 756 757 759
#> [46] 761 762 763 764 766 768 769 770 772 773 774 776 777 778 780
#> [61] 781 783 784 786 788 789 791 793 795 797 799 800 801 803 805
#> [76] 806 808 809 811 813 814 816 817 819 821 822 824 825 826 828
#> [91] 829 830 831 832 834 835 837 838 839 841 842 843 845 846 847
#> [106] 852 853 855 857 860 861 862 864 865 866 868 869 870 871 873
#> [121] 876 877 878 880 882 883 884 886 888 891 893 895 896 898 900
#> [136] 901 903 904 905 906 908 909 910 911 914 915 916 919 920 921
#> [151] 923 924 926 927 928 930 931 933 934 936 938 939 940 942 944
#> [166] 945 946 948 949 951 952 954 956 957 959 960 961 962 964 965
#> [181] 966 968 969 970 971 973 974 976 977 979 980 982 983 984 985
#> [196] 986 988 989 990 992 993 995 996 998 1000 1300 1700 1701 1702 1705
#> [211] 1708 1710 1711 1712 1713 1715 1716 1717 1718 1720 1721 1724 1725 1726 1728
#> [226] 1729 1730 1731 1733 1734 1735 1737 1738 1740 1742 1743 1745 1747 1749 1750
#> [241] 1752 1753 1754 1755 1756 1758 1759 1760 1762 1763 1764 1766 1767 1769 1770
#> [256] 1771 1773 1774 1776 1777 1779 1780 1782 1783 1785 1787 1788 1790 1791 1792
#> [271] 1793 1794 1796 1798 1800 1801 1803 1805 1809 1811 1812 1813 1814 1816 1817
#> [286] 1819 1820 1821 1823 1824 1826 1828 1830 1832 1833 1834 1836 1838 1839 1841
#> [301] 1842 1843 1846 1847 1850 1851 1853 1854 1856 1857 1859 1861 1862 1864 1865
#> [316] 1866 1869 1870 1872 1874 1876 1878 1879 1881 1882 1883 1885 1887 1888 1889
#> [331] 1890 1892 1894 1896 1898 1899 1900 1902 1904 1905 1907 1908 1909 1911 1913
#> [346] 1914 1916 1918 1919 1920 1922 1923 1925 1926 1928 1930 1931 1933 1935 1936
#> [361] 1937 1939 1940 1942 1943 1945 1947 1948 1950 1951 1952 1953 1955 1956 1957
#> [376] 1959 1960 1961 1963 1964 1966 1967 1968 1970 1972 1974 1976 1977 1978 1979
#> [391] 1982 1983 1985 1987 1990 1992 1993 1994 1996 1997
invisible(
capture.output(
result_Rbeast <- Rbeast::beast123(
mv_data_3,
metadata = list(whichDimIsTime = 1),
season = "none"
)
)
)
result_Rbeast$trend$cp
#> [,1] [,2] [,3]
#> [1,] 301 301 301
#> [2,] 1301 701 1301
#> [3,] 701 1301 702
#> [4,] 1994 807 757
#> [5,] 1978 881 764
#> [6,] 1986 887 1924
#> [7,] 1972 830 1942
#> [8,] 823 1960 769
#> [9,] 1825 1993 1825
#> [10,] 894 820 1831
The true change points are 100 and 200.
The true change point is 300.
The true change points are 500, 800 and 1000.
The true change points are 80, 200 and 320.
The true change point is 600. Some methods are plotted due to the un-retrievable change points.
The true change point is 200.
The true change points is 500.
VARDetect::tbss(var_data)
#> [1] "first.brk.points:"
#> [1] 140 196 252 308 364 420 476 532 588 644 700
#> [1] "selected lambda1:"
#> [1] 0.2107081
#> [1] "selected lambda2:"
#> [1] 0.02943525
#> [1] "second.brk.points:"
#> [1] 308 364 588
#> [1] "second.brk.points:"
#> [1] 308 476 588
#> [1] "second.brk.points:"
#> [1] 476 532
#> [1] "second.brk.points:"
#> [1] 476 532
#> [1] "second.brk.points:"
#> [1] 476 532
#> no refit for the parameter estimation
#> Estimated change points are: 501
well_log
result <- list(
fastcpd = fastcpd.mean(well_log, trim = 0.003)@cp_set,
changepoint = changepoint::cpt.mean(well_log)@cpts,
CptNonPar =
CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6))$cpts,
strucchange = strucchange::breakpoints(
y ~ 1, data = data.frame(y = well_log)
)$breakpoints,
ecp = ecp::e.divisive(matrix(well_log))$estimates,
breakfast = breakfast::breakfast(well_log)$cptmodel.list[[6]]$cpts,
wbs = wbs::wbs(well_log)$cpt$cpt.ic$mbic.penalty,
mosum = mosum::mosum(c(well_log), G = 40)$cpts.info$cpts,
# fpop = fpop::Fpop(well_log, length(well_log))$t.est, # meaningless
gfpop = gfpop::gfpop(
data = well_log,
mygraph = gfpop::graph(
penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
type = "updown"
),
type = "mean"
)$changepoints,
InspectChangepoint = InspectChangepoint::inspect(
well_log,
threshold = InspectChangepoint::compute.threshold(length(well_log), 1)
)$changepoints[, "location"],
jointseg = jointseg::jointSeg(well_log, K = 12)$bestBkp,
Rbeast = Rbeast::beast(
well_log, season = "none", print.progress = FALSE, quiet = TRUE
)$trend$cp,
stepR = stepR::stepFit(well_log, alpha = 0.5)$rightEnd
)
package_list <- sort(names(result), decreasing = TRUE)
comparison_table <- NULL
for (package_index in seq_along(package_list)) {
package <- package_list[[package_index]]
comparison_table <- rbind(
comparison_table,
data.frame(
change_point = result[[package]],
package = package,
y_offset = (package_index - 1) * 400
)
)
}
most_selected <- sort(table(comparison_table$change_point), decreasing = TRUE)
most_selected <- sort(as.numeric(names(most_selected[most_selected >= 4])))
for (i in seq_len(length(most_selected) - 1)) {
if (most_selected[i + 1] - most_selected[i] < 2) {
most_selected[i] <- NA
most_selected[i + 1] <- most_selected[i + 1] - 0.5
}
}
(most_selected <- most_selected[!is.na(most_selected)])
#> [1] 6.0 314.0 434.0 704.0 776.0 1021.0 1057.0 1347.0 1405.0 1502.0 1661.0 1842.0 2023.0 2202.0
#> [15] 2384.5 2445.0 2507.0 2567.5 2738.0 2921.0 3094.0 3251.0 3464.0 3499.0 3622.0 3709.0 3820.0 3976.0
ggplot2::ggplot() +
ggplot2::geom_point(
data = data.frame(x = seq_along(well_log), y = c(well_log)),
ggplot2::aes(x = x, y = y)
) +
ggplot2::geom_vline(
xintercept = most_selected,
color = "black",
linetype = "dashed",
alpha = 0.2
) +
ggplot2::geom_point(
data = comparison_table,
ggplot2::aes(x = change_point, y = 95000 + y_offset, color = package),
shape = 17,
size = 1.75
) +
ggplot2::geom_hline(
data = comparison_table,
ggplot2::aes(yintercept = 95000 + y_offset, color = package),
linetype = "dashed",
alpha = 0.1
) +
ggplot2::coord_cartesian(
ylim = c(95000 - 500, max(well_log) + 1000),
xlim = c(-200, length(well_log) + 200),
expand = FALSE
) +
ggplot2::theme(
panel.background = ggplot2::element_blank(),
panel.border = ggplot2::element_rect(colour = "black", fill = NA),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()
) +
ggplot2::xlab(NULL) + ggplot2::ylab(NULL)
well_log
datawell_log
microbenchmark_result <- microbenchmark::microbenchmark(
fastcpd = fastcpd::fastcpd.mean(well_log, trim = 0.003, r.progress = FALSE),
changepoint = changepoint::cpt.mean(well_log, method = "PELT"),
CptNonPar = CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6)),
strucchange =
strucchange::breakpoints(y ~ 1, data = data.frame(y = well_log)),
ecp = ecp::e.divisive(matrix(well_log)),
breakfast = breakfast::breakfast(well_log),
wbs = wbs::wbs(well_log),
mosum = mosum::mosum(c(well_log), G = 40),
fpop = fpop::Fpop(well_log, nrow(well_log)),
gfpop = gfpop::gfpop(
data = well_log,
mygraph = gfpop::graph(
penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
type = "updown"
),
type = "mean"
),
InspectChangepoint = InspectChangepoint::inspect(
well_log,
threshold = InspectChangepoint::compute.threshold(length(well_log), 1)
),
jointseg = jointseg::jointSeg(well_log, K = 12),
Rbeast = Rbeast::beast(
well_log, season = "none", print.progress = FALSE, quiet = TRUE
),
stepR = stepR::stepFit(well_log, alpha = 0.5),
# mcp = mcp::mcp(
# list(y ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1),
# data = data.frame(y = well_log, x = seq_along(well_log)),
# par_x = "x"
# ),
not = not::not(well_log, contrast = "pcwsConstMean"),
times = 10
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> fastcpd 4.010279e+02 4.129134e+02 4.171979e+02 4.148564e+02 4.196057e+02 4.350300e+02 10
#> changepoint 3.195488e+01 3.230887e+01 3.743290e+01 3.352390e+01 4.418598e+01 4.980063e+01 10
#> CptNonPar 1.809139e+04 1.867986e+04 1.904000e+04 1.876936e+04 1.947816e+04 2.076009e+04 10
#> strucchange 5.813672e+04 5.831023e+04 5.963604e+04 5.952739e+04 6.139957e+04 6.173251e+04 10
#> ecp 7.534981e+05 7.642983e+05 7.742041e+05 7.661485e+05 7.880130e+05 8.003981e+05 10
#> breakfast 9.231426e+03 9.261763e+03 9.514283e+03 9.492325e+03 9.630395e+03 1.001344e+04 10
#> wbs 1.133042e+02 1.168601e+02 1.189637e+02 1.187244e+02 1.201765e+02 1.281057e+02 10
#> mosum 1.082397e+00 1.167387e+00 1.213233e+00 1.209105e+00 1.286479e+00 1.320333e+00 10
#> fpop 2.407630e+00 2.481348e+00 2.641625e+00 2.551970e+00 2.618455e+00 3.328585e+00 10
#> gfpop 5.918783e+01 5.959448e+01 6.081621e+01 6.006626e+01 6.048699e+01 6.725864e+01 10
#> InspectChangepoint 1.550181e+02 1.885944e+02 2.103973e+02 2.030795e+02 2.262247e+02 3.025476e+02 10
#> jointseg 1.082510e+01 1.082948e+01 1.174498e+01 1.110779e+01 1.221183e+01 1.424236e+01 10
#> Rbeast 6.311987e+02 6.634019e+02 6.702594e+02 6.680217e+02 6.846772e+02 7.048629e+02 10
#> stepR 2.812109e+01 2.836331e+01 2.864408e+01 2.862293e+01 2.897236e+01 2.933441e+01 10
#> not 9.350182e+01 9.469137e+01 9.684097e+01 9.647117e+01 9.763828e+01 1.031233e+02 10
well_log
dataknitr::opts_chunk$set(
collapse = TRUE, comment = "#>", eval = TRUE, cache = FALSE,
warning = FALSE, fig.width = 8, fig.height = 5
)
for (
package in c(
"ggplot2", "mvtnorm",
# "bcp",
"breakfast", "changepoint", "cpm", "CptNonPar", "ecp", "fpop", "gfpop",
"InspectChangepoint", "jointseg", "mcp", "mosum", "not", "Rbeast",
"segmented", "stepR", "strucchange", "VARDetect", "wbs"
)
) {
if (!requireNamespace(package, quietly = TRUE)) utils::install.packages(
package, repos = "https://cloud.r-project.org", quiet = TRUE
)
}
# Univariate mean change
set.seed(1)
p <- 1
mean_data_1 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)
plot.ts(mean_data_1)
# Univariate mean and/or variance change
set.seed(1)
p <- 1
mv_data_1 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(50, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(50, p))
)
plot.ts(mv_data_1)
# Multivariate mean change
set.seed(1)
p <- 3
mean_data_3 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)
plot.ts(mean_data_3)
# Multivariate mean and/or variance change
set.seed(1)
p <- 3
mv_data_3 <- rbind(
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(50, p)),
mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(50, p))
)
plot.ts(mv_data_3)
# Linear regression
set.seed(1)
n <- 300
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(c(1, 3.2, -1, 0), c(-1, -0.5, 2.5, -2), c(0.8, 0, 1, 2))
y <- c(
x[1:100, ] %*% theta_0[1, ] + rnorm(100, 0, 3),
x[101:200, ] %*% theta_0[2, ] + rnorm(100, 0, 3),
x[201:n, ] %*% theta_0[3, ] + rnorm(100, 0, 3)
)
lm_data <- data.frame(y = y, x = x)
plot.ts(lm_data)
# Logistic regression
set.seed(1)
n <- 500
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
y <- c(
rbinom(300, 1, 1 / (1 + exp(-x[1:300, ] %*% theta[1, ]))),
rbinom(200, 1, 1 / (1 + exp(-x[301:n, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)
plot.ts(binomial_data)
# Poisson regression
set.seed(1)
n <- 1100
p <- 3
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
delta <- rnorm(p)
theta_0 <- c(1, 0.3, -1)
y <- c(
rpois(500, exp(x[1:500, ] %*% theta_0)),
rpois(300, exp(x[501:800, ] %*% (theta_0 + delta))),
rpois(200, exp(x[801:1000, ] %*% theta_0)),
rpois(100, exp(x[1001:1100, ] %*% (theta_0 - delta)))
)
poisson_data <- data.frame(y = y, x = x)
plot.ts(log(poisson_data$y))
plot.ts(poisson_data[, -1])
# Lasso
set.seed(1)
n <- 480
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(
runif(p_true, -5, -2),
runif(p_true, -3, 3),
runif(p_true, 2, 5),
runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
x[321:n, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
lasso_data <- data.frame(y = y, x = x)
plot.ts(lasso_data[, seq_len(p_true + 1)])
# AR(3)
set.seed(1)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
ar_data <- x[-seq_len(3)]
plot.ts(ar_data)
# GARCH(1, 1)
set.seed(1)
n <- 400
sigma_2 <- rep(1, n + 1)
x <- rep(0, n + 1)
for (i in seq_len(200)) {
sigma_2[i + 1] <- 20 + 0.5 * x[i]^2 + 0.1 * sigma_2[i]
x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
for (i in 201:400) {
sigma_2[i + 1] <- 1 + 0.1 * x[i]^2 + 0.5 * sigma_2[i]
x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
garch_data <- x[-1]
plot.ts(garch_data)
# VAR(2)
set.seed(1)
n <- 800
p <- 2
theta_1 <- matrix(c(-0.3, 0.6, -0.5, 0.4, 0.2, 0.2, 0.2, -0.2), nrow = p)
theta_2 <- matrix(c(0.3, -0.4, 0.1, -0.5, -0.5, -0.2, -0.5, 0.2), nrow = p)
x <- matrix(0, n + 2, p)
for (i in 1:500) {
x[i + 2, ] <- theta_1 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
for (i in 501:n) {
x[i + 2, ] <- theta_2 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
var_data <- x[-seq_len(2), ]
plot.ts(var_data)
fastcpd::fastcpd.mean(mean_data_1, r.progress = FALSE)@cp_set
CptNonPar::np.mojo(mean_data_1, G = floor(length(mean_data_1) / 6))$cpts
# Slow
strucchange::breakpoints(y ~ 1, data = data.frame(y = mean_data_1))$breakpoints
# Slower
ecp::e.divisive(mean_data_1)$estimates
# Data need to be processed
changepoint::cpt.mean(c(mean_data_1))@cpts
breakfast::breakfast(mean_data_1)$cptmodel.list[[6]]$cpts
wbs::wbs(mean_data_1)$cpt$cpt.ic$mbic.penalty
# Data need to be processed. `G` is selected based on the example
if (interactive()) {
mosum::mosum(c(mean_data_1), G = 40)$cpts.info$cpts
}
fpop::Fpop(mean_data_1, nrow(mean_data_1))$t.est
gfpop::gfpop(
data = mean_data_1,
mygraph = gfpop::graph(
penalty = 2 * log(nrow(mean_data_1)) * gfpop::sdDiff(mean_data_1) ^ 2,
type = "updown"
),
type = "mean"
)$changepoints
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mean_data_1),
threshold = InspectChangepoint::compute.threshold(
nrow(mean_data_1), ncol(mean_data_1)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
jointseg::jointSeg(mean_data_1, K = 2)$bestBkp
Rbeast::beast(
mean_data_1, season = "none", print.progress = FALSE, quiet = TRUE
)$trend$cp
stepR::stepFit(mean_data_1, alpha = 0.5)$rightEnd
cpm::processStream(mean_data_1, cpmType = "Student")$changePoints
segmented::segmented(
lm(y ~ 1 + x, data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1)))),
seg.Z = ~ x
)$psi[, "Est."]
# Slowest
if (interactive()) {
plot(
mcp::mcp(
list(y ~ 1, ~ 1, ~ 1),
data = data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1))),
par_x = "x"
)
)
}
plot(not::not(mean_data_1, contrast = "pcwsConstMean"))
# plot(bcp::bcp(mean_data_1))
fastcpd::fastcpd.mv(mv_data_1, r.progress = FALSE)@cp_set
# Slow
ecp::e.divisive(mv_data_1)$estimates
# Data need to be processed
changepoint::cpt.meanvar(c(mv_data_1))@cpts
CptNonPar::np.mojo(mv_data_1, G = floor(length(mv_data_1) / 6))$cpts
cpm::processStream(mv_data_1, cpmType = "GLR")$changePoints
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mv_data_1),
threshold = InspectChangepoint::compute.threshold(
nrow(mv_data_1), ncol(mv_data_1)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
Rbeast::beast(
mv_data_1, season = "none", print.progress = FALSE, quiet = TRUE
)$trend$cp
# Slower
if (interactive()) {
plot(
mcp::mcp(
list(y ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1),
data = data.frame(y = mv_data_1, x = seq_len(nrow(mv_data_1))),
par_x = "x"
)
)
}
plot(not::not(mv_data_1, contrast = "pcwsConstMeanVar"))
fastcpd::fastcpd.mean(mean_data_3, r.progress = FALSE)@cp_set
CptNonPar::np.mojo(mean_data_3, G = floor(nrow(mean_data_3) / 6))$cpts
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mean_data_3),
threshold = InspectChangepoint::compute.threshold(
nrow(mean_data_3), ncol(mean_data_3)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
jointseg::jointSeg(mean_data_3, K = 2)$bestBkp
invisible(
capture.output(
result_Rbeast <- Rbeast::beast123(
mean_data_3,
metadata = list(whichDimIsTime = 1),
season = "none"
)
)
)
result_Rbeast$trend$cp
# Slow
strucchange::breakpoints(
cbind(y.1, y.2, y.3) ~ 1, data = data.frame(y = mean_data_3)
)$breakpoints
# Slower
ecp::e.divisive(mean_data_3)$estimates
# plot(bcp::bcp(mean_data_3))
fastcpd::fastcpd.mv(mv_data_3, r.progress = FALSE)@cp_set
# Slow
ecp::e.divisive(mv_data_3)$estimates
invisible(
suppressMessages(
capture.output(
result_InspectChangepoint <- InspectChangepoint::inspect(
t(mv_data_3),
threshold = InspectChangepoint::compute.threshold(
nrow(mv_data_3), ncol(mv_data_3)
)
)
)
)
)
result_InspectChangepoint$changepoints[, "location"]
invisible(
capture.output(
result_Rbeast <- Rbeast::beast123(
mv_data_3,
metadata = list(whichDimIsTime = 1),
season = "none"
)
)
)
result_Rbeast$trend$cp
fastcpd::fastcpd.lm(lm_data, r.progress = FALSE)@cp_set
strucchange::breakpoints(y ~ . - 1, data = lm_data)$breakpoints
segmented::segmented(
lm(
y ~ . - 1,
data.frame(y = lm_data$y, x = lm_data[, -1], index = seq_len(nrow(lm_data)))
),
seg.Z = ~ index
)$psi[, "Est."]
fastcpd::fastcpd.binomial(binomial_data, r.progress = FALSE)@cp_set
strucchange::breakpoints(y ~ . - 1, data = binomial_data)$breakpoints
fastcpd::fastcpd.poisson(poisson_data, r.progress = FALSE)@cp_set
# Slow
strucchange::breakpoints(y ~ . - 1, data = poisson_data)$breakpoints
fastcpd::fastcpd.lasso(lasso_data, r.progress = FALSE)@cp_set
# Slow
strucchange::breakpoints(y ~ . - 1, data = lasso_data)$breakpoints
fastcpd::fastcpd.ar(ar_data, 3, r.progress = FALSE)@cp_set
CptNonPar::np.mojo(ar_data, G = floor(length(ar_data) / 6))$cpts
segmented::segmented(
lm(
y ~ x + 1, data.frame(y = ar_data, x = seq_along(ar_data))
),
seg.Z = ~ x
)$psi[, "Est."]
# Slow
if (interactive()) {
plot(
mcp::mcp(
list(y ~ 1 + ar(3), ~ 0 + ar(3)),
data = data.frame(y = ar_data, x = seq_along(ar_data)),
par_x = "x"
)
)
}
fastcpd::fastcpd.garch(garch_data, c(1, 1), r.progress = FALSE)@cp_set
CptNonPar::np.mojo(garch_data, G = floor(length(garch_data) / 6))$cpts
strucchange::breakpoints(x ~ 1, data = data.frame(x = garch_data))$breakpoints
fastcpd::fastcpd.var(
var_data, 2, cost_adjustment = NULL, r.progress = FALSE
)@cp_set
VARDetect::tbss(var_data)
result <- list(
fastcpd = fastcpd.mean(well_log, trim = 0.003)@cp_set,
changepoint = changepoint::cpt.mean(well_log)@cpts,
CptNonPar =
CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6))$cpts,
strucchange = strucchange::breakpoints(
y ~ 1, data = data.frame(y = well_log)
)$breakpoints,
ecp = ecp::e.divisive(matrix(well_log))$estimates,
breakfast = breakfast::breakfast(well_log)$cptmodel.list[[6]]$cpts,
wbs = wbs::wbs(well_log)$cpt$cpt.ic$mbic.penalty,
mosum = mosum::mosum(c(well_log), G = 40)$cpts.info$cpts,
# fpop = fpop::Fpop(well_log, length(well_log))$t.est, # meaningless
gfpop = gfpop::gfpop(
data = well_log,
mygraph = gfpop::graph(
penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
type = "updown"
),
type = "mean"
)$changepoints,
InspectChangepoint = InspectChangepoint::inspect(
well_log,
threshold = InspectChangepoint::compute.threshold(length(well_log), 1)
)$changepoints[, "location"],
jointseg = jointseg::jointSeg(well_log, K = 12)$bestBkp,
Rbeast = Rbeast::beast(
well_log, season = "none", print.progress = FALSE, quiet = TRUE
)$trend$cp,
stepR = stepR::stepFit(well_log, alpha = 0.5)$rightEnd
)
package_list <- sort(names(result), decreasing = TRUE)
comparison_table <- NULL
for (package_index in seq_along(package_list)) {
package <- package_list[[package_index]]
comparison_table <- rbind(
comparison_table,
data.frame(
change_point = result[[package]],
package = package,
y_offset = (package_index - 1) * 400
)
)
}
most_selected <- sort(table(comparison_table$change_point), decreasing = TRUE)
most_selected <- sort(as.numeric(names(most_selected[most_selected >= 4])))
for (i in seq_len(length(most_selected) - 1)) {
if (most_selected[i + 1] - most_selected[i] < 2) {
most_selected[i] <- NA
most_selected[i + 1] <- most_selected[i + 1] - 0.5
}
}
(most_selected <- most_selected[!is.na(most_selected)])
#> [1] 6.0 314.0 434.0 704.0 776.0 1021.0 1057.0 1347.0 1405.0 1502.0 1661.0 1842.0 2023.0 2202.0
#> [15] 2384.5 2445.0 2507.0 2567.5 2738.0 2921.0 3094.0 3251.0 3464.0 3499.0 3622.0 3709.0 3820.0 3976.0
ggplot2::ggplot() +
ggplot2::geom_point(
data = data.frame(x = seq_along(well_log), y = c(well_log)),
ggplot2::aes(x = x, y = y)
) +
ggplot2::geom_vline(
xintercept = most_selected,
color = "black",
linetype = "dashed",
alpha = 0.2
) +
ggplot2::geom_point(
data = comparison_table,
ggplot2::aes(x = change_point, y = 95000 + y_offset, color = package),
shape = 17,
size = 1.75
) +
ggplot2::geom_hline(
data = comparison_table,
ggplot2::aes(yintercept = 95000 + y_offset, color = package),
linetype = "dashed",
alpha = 0.1
) +
ggplot2::coord_cartesian(
ylim = c(95000 - 500, max(well_log) + 1000),
xlim = c(-200, length(well_log) + 200),
expand = FALSE
) +
ggplot2::theme(
panel.background = ggplot2::element_blank(),
panel.border = ggplot2::element_rect(colour = "black", fill = NA),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank()
) +
ggplot2::xlab(NULL) + ggplot2::ylab(NULL)
microbenchmark_result <- microbenchmark::microbenchmark(
fastcpd = fastcpd::fastcpd.mean(well_log, trim = 0.003, r.progress = FALSE),
changepoint = changepoint::cpt.mean(well_log, method = "PELT"),
CptNonPar = CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6)),
strucchange =
strucchange::breakpoints(y ~ 1, data = data.frame(y = well_log)),
ecp = ecp::e.divisive(matrix(well_log)),
breakfast = breakfast::breakfast(well_log),
wbs = wbs::wbs(well_log),
mosum = mosum::mosum(c(well_log), G = 40),
fpop = fpop::Fpop(well_log, nrow(well_log)),
gfpop = gfpop::gfpop(
data = well_log,
mygraph = gfpop::graph(
penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
type = "updown"
),
type = "mean"
),
InspectChangepoint = InspectChangepoint::inspect(
well_log,
threshold = InspectChangepoint::compute.threshold(length(well_log), 1)
),
jointseg = jointseg::jointSeg(well_log, K = 12),
Rbeast = Rbeast::beast(
well_log, season = "none", print.progress = FALSE, quiet = TRUE
),
stepR = stepR::stepFit(well_log, alpha = 0.5),
# mcp = mcp::mcp(
# list(y ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1),
# data = data.frame(y = well_log, x = seq_along(well_log)),
# par_x = "x"
# ),
not = not::not(well_log, contrast = "pcwsConstMean"),
times = 10
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> fastcpd 4.010279e+02 4.129134e+02 4.171979e+02 4.148564e+02 4.196057e+02 4.350300e+02 10
#> changepoint 3.195488e+01 3.230887e+01 3.743290e+01 3.352390e+01 4.418598e+01 4.980063e+01 10
#> CptNonPar 1.809139e+04 1.867986e+04 1.904000e+04 1.876936e+04 1.947816e+04 2.076009e+04 10
#> strucchange 5.813672e+04 5.831023e+04 5.963604e+04 5.952739e+04 6.139957e+04 6.173251e+04 10
#> ecp 7.534981e+05 7.642983e+05 7.742041e+05 7.661485e+05 7.880130e+05 8.003981e+05 10
#> breakfast 9.231426e+03 9.261763e+03 9.514283e+03 9.492325e+03 9.630395e+03 1.001344e+04 10
#> wbs 1.133042e+02 1.168601e+02 1.189637e+02 1.187244e+02 1.201765e+02 1.281057e+02 10
#> mosum 1.082397e+00 1.167387e+00 1.213233e+00 1.209105e+00 1.286479e+00 1.320333e+00 10
#> fpop 2.407630e+00 2.481348e+00 2.641625e+00 2.551970e+00 2.618455e+00 3.328585e+00 10
#> gfpop 5.918783e+01 5.959448e+01 6.081621e+01 6.006626e+01 6.048699e+01 6.725864e+01 10
#> InspectChangepoint 1.550181e+02 1.885944e+02 2.103973e+02 2.030795e+02 2.262247e+02 3.025476e+02 10
#> jointseg 1.082510e+01 1.082948e+01 1.174498e+01 1.110779e+01 1.221183e+01 1.424236e+01 10
#> Rbeast 6.311987e+02 6.634019e+02 6.702594e+02 6.680217e+02 6.846772e+02 7.048629e+02 10
#> stepR 2.812109e+01 2.836331e+01 2.864408e+01 2.862293e+01 2.897236e+01 2.933441e+01 10
#> not 9.350182e+01 9.469137e+01 9.684097e+01 9.647117e+01 9.763828e+01 1.031233e+02 10
ggplot2::autoplot(microbenchmark_result)