Comparison with other R packages

All mosum code is commented out due to [#4].

All mcp code is commented out due to link 1 and link 2.

All bcp code is commented out due to [#5]

for (
  package in c(
    "ggplot2", "mvtnorm",
    # "bcp",
    "breakfast", "changepoint", "cpm", "ecp", "fpop", "mcp",
    "mosum", "not", "segmented", "stepR", "strucchange", "wbs"
  )
) {
  if (!requireNamespace(package, quietly = TRUE)) utils::install.packages(
    package, repos = "https://cloud.r-project.org", quiet = TRUE
  )
}

Data setup

set.seed(1)
p <- 1

# Univariate mean change
mean_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_1)

# Univariate mean and/or variance change
mv_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(50, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(50, p))
)

plot.ts(mv_data_1)

p <- 3

# Multivariate mean change
mean_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_3)

# Multivariate mean and/or variance change
mv_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(50, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(50, p))
)

plot.ts(mv_data_3)

# Linear regression
n <- 300
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(c(1, 3.2, -1, 0), c(-1, -0.5, 2.5, -2), c(0.8, 0, 1, 2))
y <- c(
  x[1:100, ] %*% theta_0[1, ] + rnorm(100, 0, 3),
  x[101:200, ] %*% theta_0[2, ] + rnorm(100, 0, 3),
  x[201:300, ] %*% theta_0[3, ] + rnorm(100, 0, 3)
)
lm_data <- data.frame(y = y, x = x)

plot.ts(lm_data)

# Logistic regression
x <- matrix(rnorm(1500, 0, 1), ncol = 5)
theta <- rbind(rnorm(5, 0, 1), rnorm(5, 2, 1))
y <- c(
  rbinom(125, 1, 1 / (1 + exp(-x[1:125, ] %*% theta[1, ]))),
  rbinom(175, 1, 1 / (1 + exp(-x[126:300, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)

# Poisson regression
n <- 1100
p <- 3
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
delta <- rnorm(p)
theta_0 <- c(1, 0.3, -1)
y <- c(
  rpois(500, exp(x[1:500, ] %*% theta_0)),
  rpois(300, exp(x[501:800, ] %*% (theta_0 + delta))),
  rpois(200, exp(x[801:1000, ] %*% theta_0)),
  rpois(100, exp(x[1001:1100, ] %*% (theta_0 - delta)))
)
poisson_data <- data.frame(y = y, x = x)

plot.ts(log(poisson_data$y))

plot.ts(poisson_data[, -1])

# Lasso
n <- 480
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(
  runif(p_true, -5, -2),
  runif(p_true, -3, 3),
  runif(p_true, 2, 5),
  runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
  x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
  x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
  x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
  x[321:n, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
lasso_data <- data.frame(y = y, x = x)

plot.ts(lasso_data[, seq_len(p_true + 1)])

# AR(3)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
  x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
  x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
ar_data <- x[-seq_len(3)]

plot.ts(ar_data)

# GARCH(1, 1)
n <- 400
sigma_2 <- rep(1, n + 1)
x <- rep(0, n + 1)
for (i in seq_len(200)) {
  sigma_2[i + 1] <- 20 + 0.5 * x[i]^2 + 0.1 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
for (i in 201:400) {
  sigma_2[i + 1] <- 1 + 0.1 * x[i]^2 + 0.5 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
garch_data <- x[-1]

plot.ts(garch_data)

Univariate mean change

The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.

fastcpd::fastcpd.mean(mean_data_1, r.progress = FALSE)@cp_set
#> [1] 300 700
# Slow
strucchange::breakpoints(y ~ 1, data = data.frame(y = mean_data_1))$breakpoints
#> [1] 300 700
# Slower
ecp::e.divisive(mean_data_1)$estimates
#> [1]    1  301  701 1001
# Data need to be processed
changepoint::cpt.mean(c(mean_data_1))@cpts
#> [1]  300 1000
breakfast::breakfast(mean_data_1)$cptmodel.list[[6]]$cpts
#> [1] 300 700
wbs::wbs(mean_data_1)$cpt$cpt.ic$mbic.penalty
#> [1] 300 700
# Data need to be processed. `G` is selected based on the example
if (interactive()) {
  mosum::mosum(c(mean_data_1), G = 40)$cpts.info$cpts
}
fpop::Fpop(mean_data_1, nrow(mean_data_1))$t.est
#> [1]  300  700 1000
stepR::stepFit(mean_data_1, alpha = 0.5)$rightEnd
#> [1]  300  700 1000
cpm::processStream(mean_data_1, cpmType = "Student")$changePoints
#> [1] 299 699
segmented::segmented(
  lm(y ~ 1 + x, data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1)))),
  seg.Z = ~ x
)$psi[, "Est."]
#> [1] 495
# Slowest
if (interactive()) {
  plot(
    mcp::mcp(
      list(y ~ 1, ~ 1, ~ 1),
      data = data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1))),
      par_x = "x"
    )
  )
}
plot(not::not(mean_data_1, contrast = "pcwsConstMean"))

# plot(bcp::bcp(mean_data_1))

Univariate mean and/or variance change

The true change points are 300, 700, 1000, 1300 and 1700. Some methods are plotted due to the un-retrievable change points.

fastcpd::fastcpd.mv(mv_data_1, r.progress = FALSE)@cp_set
#> [1]  300  701 1000 1300 1699
# Slow
ecp::e.divisive(mv_data_1)$estimates
#> [1]    1  301  702 1000 1301 1702 2001
# Data need to be processed
changepoint::cpt.meanvar(c(mv_data_1))@cpts
#> [1]  300 2000
cpm::processStream(mv_data_1, cpmType = "GLR")$changePoints
#>  [1]   21   24  293  300  417  693  701  981  985  999 1296 1300 1695
# Slower
if (interactive()) {
  plot(
    mcp::mcp(
      list(y ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1),
      data = data.frame(y = mv_data_1, x = seq_len(nrow(mv_data_1))),
      par_x = "x"
    )
  )
}
plot(not::not(mv_data_1, contrast = "pcwsConstMeanVar"))

#> Press [enter] to continue

Multivariate mean change

The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.

fastcpd::fastcpd.mean(mean_data_3, r.progress = FALSE)@cp_set
#> [1] 300 700
# Slow
strucchange::breakpoints(
  cbind(y.1, y.2, y.3) ~ 1, data = data.frame(y = mean_data_3)
)$breakpoints
#> [1] 300 700
# Slower
ecp::e.divisive(mean_data_3)$estimates
#> [1]    1  301  701 1001
# plot(bcp::bcp(mean_data_3))

Multivariate mean and/or variance change

The true change points are 300, 700, 1000, 1300 and 1700. Some methods are plotted due to the un-retrievable change points.

fastcpd::fastcpd.mv(mv_data_3, r.progress = FALSE)@cp_set
#> [1]  300  700 1000 1300 1700
# Slow
ecp::e.divisive(mv_data_3)$estimates
#> [1]    1  301  701 1001 1301 1701 2001

Linear regression

The true change points are 100 and 200.

fastcpd::fastcpd.lm(lm_data, r.progress = FALSE)@cp_set
#> [1] 100 200
strucchange::breakpoints(y ~ . - 1, data = lm_data)$breakpoints
#> [1] 100 200
segmented::segmented(
  lm(
    y ~ . - 1,
    data.frame(y = lm_data$y, x = lm_data[, -1], index = seq_len(nrow(lm_data)))
  ),
  seg.Z = ~ index
)$psi[, "Est."]
#> [1] 105

Logistic regression

The true change point is 125.

fastcpd::fastcpd.binomial(binomial_data, r.progress = FALSE)@cp_set
#> [1] 125
strucchange::breakpoints(y ~ . - 1, data = binomial_data)$breakpoints
#> [1] 126

Poisson regression

The true change points are 500, 800 and 1000.

fastcpd::fastcpd.poisson(poisson_data, r.progress = FALSE)@cp_set
#> [1] 518 800 999
# Slow
strucchange::breakpoints(y ~ . - 1, data = poisson_data)$breakpoints
#> [1] 525 690

Lasso

The true change points are 80, 200 and 320.

fastcpd::fastcpd.lasso(lasso_data, r.progress = FALSE)@cp_set
#> [1]  81 201 320
# Slow
strucchange::breakpoints(y ~ . - 1, data = lasso_data)$breakpoints
#> [1]  80 200 320

AR(3)

The true change point is 600. Some methods are plotted due to the un-retrievable change points.

fastcpd::fastcpd.ar(ar_data, 3, r.progress = FALSE)@cp_set
#> [1] 597
segmented::segmented(
  lm(
    y ~ x + 1, data.frame(y = ar_data, x = seq_along(ar_data))
  ),
  seg.Z = ~ x
)$psi[, "Est."]
#> [1] 683.0417
# Slow
if (interactive()) {
  plot(
    mcp::mcp(
      list(y ~ 1 + ar(3), ~ 0 + ar(3)),
      data = data.frame(y = ar_data, x = seq_along(ar_data)),
      par_x = "x"
    )
  )
}

GARCH(1, 1)

The true change point is 200.

fastcpd::fastcpd.garch(garch_data, c(1, 1), r.progress = FALSE)@cp_set
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
#> Residual calculation failed.
#> [1] 199
strucchange::breakpoints(x ~ 1, data = data.frame(x = garch_data))$breakpoints
#> [1] 173