Introduction

Overview

grates makes it easy to perform common date grouping operations and provides 6 new classes for users to work with: yrwk (year-week), yrmon (year-month), yrqtr (year-quarter), yr (year), period and int_period. These classes aim to be formalise the idea of a grouped date whilst also being intuitive in their use. They build upon ideas of Davis Vaughan and the unreleased datea package.

library(grates)
library(ggplot2)
library(outbreaks)
library(dplyr)

yrwk (year-week)

as_yrwk() allows you to create yrwk objects. As arguments it takes, x, the date vector you wish to group and firstday, the day of the week you wish your weeks to start on; (this defaults to 1 (Monday) and can go up to 7 (Sunday)). The first week of the year is then defined as the first week containing 4 days in the new calendar year. This means that the calendar year can sometimes be different to that of the yrwk object.

# create weekday names
wdays <- weekdays(as.Date(as_yrwk(as.Date("2020-01-01"), firstday = 1L)) + 0:6)
wdays <- setNames(1:7, wdays)

# example of how weeks vary by firstday over December and January
dates <- as.Date("2020-12-29") + 0:5
dat <- lapply(wdays, function(x) as_yrwk(dates, x))
bind_cols(dates = dates, dat)
#> # A tibble: 6 x 8
#>   dates      Monday   Tuesday  Wednesday Thursday Friday   Saturday Sunday  
#>   <date>     <yrwk>   <yrwk>   <yrwk>    <yrwk>   <yrwk>   <yrwk>   <yrwk>  
#> 1 2020-12-29 2020-W53 2021-W01 2020-W52  2020-W52 2020-W52 2020-W52 2020-W53
#> 2 2020-12-30 2020-W53 2021-W01 2021-W01  2020-W52 2020-W52 2020-W52 2020-W53
#> 3 2020-12-31 2020-W53 2021-W01 2021-W01  2021-W01 2020-W52 2020-W52 2020-W53
#> 4 2021-01-01 2020-W53 2021-W01 2021-W01  2021-W01 2021-W01 2020-W52 2020-W53
#> 5 2021-01-02 2020-W53 2021-W01 2021-W01  2021-W01 2021-W01 2021-W01 2020-W53
#> 6 2021-01-03 2020-W53 2021-W01 2021-W01  2021-W01 2021-W01 2021-W01 2021-W01

We make working with yrwk and other grouped date objects easier by adopting logical conventions:

dates <- as.Date("2021-01-01") + 0:30
weeks <- as_yrwk(dates, firstday = 5) # firstday = 5 to match first day of year
head(weeks, 8)
#> [1] "2021-W01" "2021-W01" "2021-W01" "2021-W01" "2021-W01" "2021-W01" "2021-W01"
#> [8] "2021-W02"
str(weeks)
#>  'yrwk' num [1:31] 2021-W01 2021-W01 2021-W01 2021-W01 ...
#>  - attr(*, "firstday")= int 5
dat <- tibble(dates, weeks)

# addition of wholenumbers will add the corresponding number of weeks to the object
dat %>% 
  mutate(plus4 = weeks + 4)
#> # A tibble: 31 x 3
#>    dates      weeks    plus4   
#>    <date>     <yrwk>   <yrwk>  
#>  1 2021-01-01 2021-W01 2021-W05
#>  2 2021-01-02 2021-W01 2021-W05
#>  3 2021-01-03 2021-W01 2021-W05
#>  4 2021-01-04 2021-W01 2021-W05
#>  5 2021-01-05 2021-W01 2021-W05
#>  6 2021-01-06 2021-W01 2021-W05
#>  7 2021-01-07 2021-W01 2021-W05
#>  8 2021-01-08 2021-W02 2021-W06
#>  9 2021-01-09 2021-W02 2021-W06
#> 10 2021-01-10 2021-W02 2021-W06
#> # … with 21 more rows

# addition of two yrwk objects will error as it is unclear what the intention is
dat %>% 
  mutate(plus4 = weeks + weeks)
#> Error: Problem with `mutate()` input `plus4`.
#> x Cannot add <yrwk> objects to each other
#> ℹ Input `plus4` is `weeks + weeks`.

# Subtraction of wholenumbers works similarly to addition
dat %>% 
  mutate(minus4 = weeks - 4)
#> # A tibble: 31 x 3
#>    dates      weeks    minus4  
#>    <date>     <yrwk>   <yrwk>  
#>  1 2021-01-01 2021-W01 2020-W49
#>  2 2021-01-02 2021-W01 2020-W49
#>  3 2021-01-03 2021-W01 2020-W49
#>  4 2021-01-04 2021-W01 2020-W49
#>  5 2021-01-05 2021-W01 2020-W49
#>  6 2021-01-06 2021-W01 2020-W49
#>  7 2021-01-07 2021-W01 2020-W49
#>  8 2021-01-08 2021-W02 2020-W50
#>  9 2021-01-09 2021-W02 2020-W50
#> 10 2021-01-10 2021-W02 2020-W50
#> # … with 21 more rows

# Subtraction of two yrwk objects gives the difference in weeks between them
dat %>% 
  mutate(plus4 = weeks + 4, difference = plus4 - weeks)
#> # A tibble: 31 x 4
#>    dates      weeks    plus4    difference
#>    <date>     <yrwk>   <yrwk>        <int>
#>  1 2021-01-01 2021-W01 2021-W05          4
#>  2 2021-01-02 2021-W01 2021-W05          4
#>  3 2021-01-03 2021-W01 2021-W05          4
#>  4 2021-01-04 2021-W01 2021-W05          4
#>  5 2021-01-05 2021-W01 2021-W05          4
#>  6 2021-01-06 2021-W01 2021-W05          4
#>  7 2021-01-07 2021-W01 2021-W05          4
#>  8 2021-01-08 2021-W02 2021-W06          4
#>  9 2021-01-09 2021-W02 2021-W06          4
#> 10 2021-01-10 2021-W02 2021-W06          4
#> # … with 21 more rows

# weeks can be combined if they have the same firstday but not otherwise
wk1 <- as_yrwk("2020-01-01")
wk2 <- as_yrwk("2021-01-01")
c(wk1, wk2)
#> [1] "2020-W01" "2020-W53"
wk3 <- as_yrwk("2020-01-01", firstday = 2)
c(wk1, wk3)
#> Error: Unable to combine <yrwk> objects with different `firstday` attributes

For each date group we also provide associated ggplot2 scales for the x-axis:

dat <- ebola_sim_clean$linelist

dat %>%
  mutate(date = as_yrwk(date_of_infection)) %>% 
  count(date, name = "cases") %>% 
  na.omit() %>% 
  ggplot(aes(date, cases)) + geom_col() + theme_bw() + xlab("")

yrmon, yrqtr and yr

as_yrmon(), as_yrqtr() and as_yr() behave similarly to as_yrwk() with the difference being that they have no need for a firstday argument:

# create weekday names
dates <- seq(from = as.Date("2020-01-01"), to = as.Date("2021-12-01"), by = "1 month")

as_yrmon(dates)
#>  [1] "2020-Jan" "2020-Feb" "2020-Mar" "2020-Apr" "2020-May" "2020-Jun"
#>  [7] "2020-Jul" "2020-Aug" "2020-Sep" "2020-Oct" "2020-Nov" "2020-Dec"
#> [13] "2021-Jan" "2021-Feb" "2021-Mar" "2021-Apr" "2021-May" "2021-Jun"
#> [19] "2021-Jul" "2021-Aug" "2021-Sep" "2021-Oct" "2021-Nov" "2021-Dec"
as_yrqtr(dates)
#>  [1] "2020-Q1" "2020-Q1" "2020-Q1" "2020-Q2" "2020-Q2" "2020-Q2" "2020-Q3"
#>  [8] "2020-Q3" "2020-Q3" "2020-Q4" "2020-Q4" "2020-Q4" "2021-Q1" "2021-Q1"
#> [15] "2021-Q1" "2021-Q2" "2021-Q2" "2021-Q2" "2021-Q3" "2021-Q3" "2021-Q3"
#> [22] "2021-Q4" "2021-Q4" "2021-Q4"
as_yr(dates)
#>  [1] "2020" "2020" "2020" "2020" "2020" "2020" "2020" "2020" "2020" "2020"
#> [11] "2020" "2020" "2021" "2021" "2021" "2021" "2021" "2021" "2021" "2021"
#> [21] "2021" "2021" "2021" "2021"
as_yrmon(dates[1]) + 0:1
#> [1] "2020-Jan" "2020-Feb"
as_yrqtr(dates[1]) + 0:1
#> [1] "2020-Q1" "2020-Q2"
as_yr(dates[1]) + 0:1
#> [1] "2020" "2021"

Again we provide ggplot2 scales for the x-axis:

dat %>%
  mutate(date = as_yrmon(date_of_infection)) %>% 
  count(date, name = "cases") %>% 
  na.omit() %>% 
  ggplot(aes(date, cases)) + geom_col() + theme_bw() + xlab("")


dat %>%
  mutate(date = as_yrqtr(date_of_infection)) %>% 
  count(date, name = "cases") %>% 
  na.omit() %>% 
  ggplot(aes(date, cases)) + geom_col() + theme_bw() + xlab("")


dat %>%
  mutate(date = as_yr(date_of_infection)) %>% 
  count(date, name = "cases") %>% 
  na.omit() %>% 
  ggplot(aes(date, cases)) + geom_col() + theme_bw() + xlab("") + scale_x_yr(n = 2)

period

as_period is more flexible, allowing users to group by periods of any length. As arguments it takes, x, the date vector you wish to group, interval, the length of period to group by and, finally, firstdate, an optional value indicating where you would like to start your periods from.

interval can be specified as either an integer value or a more standard specification such as “day”, “week”, “month”, “quarter” or “year”. The format in for the latter is similar to that used by seq.Date() where these values can optionally be preceded by a (positive or negative) integer and a space, or followed by “s”;

For the values “month”, “quarter” and “year”, intervals are always made to start at the beginning of the calendar equivalent. If the input is an integer value the input is treated as if it was specified in days (i.e. 2 and 2 days) produce the same output.

dat %>%
  mutate(date = as_period(date_of_infection, interval = "2 weeks")) %>% 
  count(date, name = "cases") %>% 
  na.omit() %>% 
  ggplot(aes(date, cases)) + geom_col() + theme_bw() + xlab("")


dat %>%
  mutate(date = as_period(date_of_infection, interval = 28)) %>% 
  count(date, name = "cases") %>% 
  na.omit() %>% 
  ggplot(aes(date, cases)) + geom_col() + theme_bw() + xlab("")