Preliminaries

library(ggplot2)
library(ggpmisc)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lubridate)

try_data_frame()

Time series

Several different formats for storing time series data are used in R. Here we use in the examples objects of class ts but several other classes are supported as try.xts() is used internally. The first example is a quarterly series.

class(austres)
## [1] "ts"
austres.df <- try_data_frame(austres)
class(austres.df)
## [1] "data.frame"
lapply(austres.df, "class")
## $time
## [1] "POSIXct" "POSIXt" 
## 
## $V.austres
## [1] "numeric"
head(austres.df, 4)
##         time V.austres
## 1 1971-04-01   13067.3
## 2 1971-07-01   13130.5
## 3 1971-10-01   13198.4
## 4 1972-01-01   13254.2

The next chunk demonstrates that numeric times are expressed as decimal years in the returned data frame.

austres.df <- try_data_frame(austres, as.numeric = TRUE)
lapply(austres.df, "class")
## $time
## [1] "numeric"
## 
## $V.austres
## [1] "numeric"
head(austres.df, 4)
##       time V.austres
## 1 1971.247   13067.3
## 2 1971.496   13130.5
## 3 1971.748   13198.4
## 4 1972.000   13254.2

This second example is for a series of yearly values.

class(lynx)
## [1] "ts"
lynx.df <- try_data_frame(lynx)
class(lynx.df)
## [1] "data.frame"
lapply(lynx.df, "class")
## $time
## [1] "POSIXct" "POSIXt" 
## 
## $V.lynx
## [1] "numeric"
head(lynx.df, 3)
##                  time V.lynx
## 1 1821-01-01 00:00:01    269
## 2 1822-01-01 00:00:01    321
## 3 1823-01-01 00:00:01    585

Above there is a small rounding error of 1 s for these old dates. We can correct this by rounding to year.

lynx.df <- try_data_frame(lynx, "year")
head(lynx.df, 3)
##         time V.lynx
## 1 1821-01-01    269
## 2 1822-01-01    321
## 3 1823-01-01    585

In addition we can convert the POSIXct values into numeric values in calendar years plus a decimal fraction.

lynx_n.df <- try_data_frame(lynx, "year", as.numeric = TRUE)
lapply(lynx_n.df, "class")
## $time
## [1] "numeric"
## 
## $V.lynx
## [1] "numeric"
head(lynx_n.df, 3)
##   time V.lynx
## 1 1821    269
## 2 1822    321
## 3 1823    585

Other classes

try_data_frame() attempts to handle gracefully objects that are not time series.

try_data_frame(1:5)
##   x
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
try_data_frame(letters[1:5])
##   x
## 1 a
## 2 b
## 3 c
## 4 d
## 5 e
try_data_frame(factor(letters[1:5]))
##   x
## 1 a
## 2 b
## 3 c
## 4 d
## 5 e
try_data_frame(list(x = rep(1,5), y = 1:5))
##   x y
## 1 1 1
## 2 1 2
## 3 1 3
## 4 1 4
## 5 1 5
try_data_frame(data.frame(x = rep(1,5), y = 1:5))
##   x y
## 1 1 1
## 2 1 2
## 3 1 3
## 4 1 4
## 5 1 5
try_data_frame(matrix(1:10, ncol = 2))
##   V1 V2
## 1  1  6
## 2  2  7
## 3  3  8
## 4  4  9
## 5  5 10

stat_peaks() and stat_valleys()

Using POSIXct for time and the default formatting of labels.

ggplot(lynx.df, aes(time, V.lynx)) + geom_line() + 
  stat_peaks(colour = "red") +
  stat_peaks(geom = "text", colour = "red", vjust = -0.5) +
  ylim(-100, 7300)

Using numeric values for time and the default formatting of labels.

ggplot(lynx_n.df, aes(time, V.lynx)) + geom_line() + 
  stat_peaks(colour = "red") +
  stat_peaks(geom = "text", colour = "red", vjust = -0.5) +
  ylim(-100, 7300)

Using POSIXct for time but supplying a format string. In addition marking both peaks and valleys.

ggplot(lynx.df, aes(time, V.lynx)) + geom_line() + 
  stat_peaks(colour = "red") +
  stat_peaks(geom = "text", colour = "red", vjust = -0.5, x.label.fmt = "%Y") +
  stat_valleys(colour = "blue") +
  stat_valleys(geom = "text", colour = "blue", vjust = 1.5, x.label.fmt = "%Y") +
  ylim(-100, 7300)

Using numeric for time but supplying a format string. In addition marking both peaks and valleys.

ggplot(lynx_n.df, aes(time, V.lynx)) + geom_line() + 
  stat_peaks(colour = "red") +
  stat_peaks(geom = "text", colour = "red", vjust = -0.5, x.label.fmt = "%4.0f") +
  stat_valleys(colour = "blue") +
  stat_valleys(geom = "text", colour = "blue", vjust = 1.5, x.label.fmt = "%4.0f") +
  ylim(-100, 7300)

Rotating the labels.

ggplot(lynx.df, aes(time, V.lynx)) + geom_line() + 
  stat_peaks(colour = "red") +
  stat_peaks(geom = "text", colour = "red", angle = 66,
             hjust = -0.1, x.label.fmt = "%Y") +
  ylim(NA, 7300)

Using geom_rug for the peaks and valleys.

ggplot(lynx.df, aes(time, V.lynx)) + geom_line() + 
  stat_peaks(colour = "red") +
  stat_peaks(geom = "rug", colour = "red") +
  stat_valleys(colour = "blue") +
  stat_valleys(geom = "rug", colour = "blue")

stat_poly_eq()

We generate some artificial data.

set.seed(4321)
# generate artificial data
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x, y, group = c("A", "B"), y2 = y * c(0.5,2))

First one example using defaults.

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(formula = formula, parse = TRUE)

stat_poly_eq() makes available three different labels in the returned data frame. One of these is used by default, but aes() can be used to select a different one.

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..adj.rr.label..), formula = formula, 
               parse = TRUE)

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..eq.label..), formula = formula, 
               parse = TRUE)

Within aes() it is possible to compute new labels based on those returned plus “arbitrary” text. The supplied labels are meant to be parsed into expressions, so any text added should be valid for a string that will be parsed.

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label =  paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")),
               formula = formula, parse = TRUE)

A couple of additional examples of polynomials of different orders, and specified in different ways.

Higher order polynomial.

formula <- y ~ poly(x, 5, raw = TRUE)
ggplot(my.data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..eq.label..), formula = formula, parse = TRUE)

Intercept forced to zero.

formula <- y ~ x + I(x^2) + I(x^3) - 1
ggplot(my.data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..eq.label..), formula = formula, 
               parse = TRUE)

We give below several examples to demonstrate how other components of the ggplot object affect the behaviour of this statistic.

Facets work as expected either with fixed or free scales. Although bellow we had to adjust the size of the font used for the equation.

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..eq.label..), size = rel(2.8),
               formula = formula, parse = TRUE) +
  facet_wrap(~group)

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..eq.label..), size = rel(2.8),
               formula = formula, parse = TRUE) +
  facet_wrap(~group, scales = "free_y")

Grouping, in this example using colour aesthetic also works as expected.

formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, colour = group)) +
  geom_point() +
  geom_smooth(method = "lm", formula = formula) +
  stat_poly_eq(aes(label = ..eq.label..), vjust = c(-8, 0),
               formula = formula, parse = TRUE) +
  theme_bw()

stat_debug()

These stats are very simple and simply echo to labels on the plot itself a summary of the data received by the compute functions.

In the absence of facets or groups we get just one label.

ggplot(my.data, aes(x, y)) + geom_point() + stat_debug_group()

ggplot(my.data, aes(x, y)) + geom_point() + stat_debug_panel()

In the case of grouping if the groups do not have very different centres in the x y space tweaking of positions can be needed to avoid overlaps.

ggplot(my.data, aes(x, y, colour = group)) + geom_point() + 
  stat_debug_group(vjust = c(-0.5,1.5))

Without facets, we still have only one panel.

ggplot(my.data, aes(x, y, colour = group)) + geom_point() + 
  stat_debug_panel()

The text would be similar, except for the column named after the aesthetic, for other aesthetics used for grouping, but the labels will visually differ only if the geom used for the debug stat is affected by them.

ggplot(my.data, aes(x, y, shape = group)) + geom_point() + 
  stat_debug_group(vjust = c(-0.5,1.5))