Panel data

Elapsed dates

The classes “monthly” and “quarterly” print as dates and are compatible with usual time extraction (ie month, year, etc). Yet, they are stored as integers representing the number of elapsed periods since 1970/01/0 (resp in week, months, quarters). This is particularly handy for simple algebra:

 # elapsed dates
 library(lubridate)
 date <- mdy(c("04/03/1992", "01/04/1992", "03/15/1992"))  
 datem <- as.monthly(date)
 # displays as a period
 datem
 #> [1] "1992m04" "1992m01" "1992m03"
 # behaves as an integer for numerical operations:
 datem + 1
 #> [1] "1992m05" "1992m02" "1992m04"
 # behaves as a date for period extractions:
 year(datem)
 #> [1] 1992 1992 1992

Vector functions

tlag/tlead a vector along a time variable (and NOT with respet to a number of row)

year <- c(1989, 1991, 1992)
value <- c(4.1, 4.5, 3.3)
tlag(value, 1, along_with = year)
library(lubridate)
date <- mdy(c("01/04/1992", "03/15/1992", "04/03/1992"))
datem <- as.monthly(date)
value <- c(4.1, 4.5, 3.3)
tlag(value, along_with = datem) 

roll_lag/roll_lead apply a function on a vector over a window defined by a time variable

year <- c(1989, 1991, 1992)
value <- c(1, 1, 1)
roll_lag(value, sum, n = 2, order_by = year) # rolling sum based on the two previous rows
roll_lag(value, sum, n = 2, along_with = year) #  rolling sum based on dates in [year-2, year]
roll_lag(value, sum, n = 2, along_with = year, closed= c(TRUE, FALSE)) #  rolling sum based on dates in [year-2, year[

Since these functions can be applied to any vector (in constrast to zoo and xts), they can be used within groups. For instance, using data.table:

DT <- data.table(
    id    = c(1, 1, 1, 2, 2),
    year  = c(1989, 1991, 1992, 1991, 1992),
    value = c(4.1, 4.5, 3.3, 3.2, 5.2)
)
DT[, value_l := tlag(value, along_with = year), by = id]
DT[, value_ma := roll_lag(value, mean, n = 3, along_with = year), by = id]

data.table functions

setpanel resort the data.table in place along the id and time variables. It returns an error if (i) date variable is missing or (ii) there are duplicates for (id, along_with).

DT <- data.table(
    id    = c(1, 1, 1, 2, 2),
    year  = c(1991, 1993, NA, 1992, 1992),
    value = c(4.1, 4.5, 3.3, 3.2, 5.2)
)
setpanel(DT, id, year)
#> Error: Error: Variable year has missing values in 1 row(s): 3
DT <- na.omit(DT, by = "year")
setpanel(DT, id, year)
#> Error: Variables (id, year) have duplicates for rows (3, 4)
DT <- unique(DT, by = c("id", "year"))
setpanel(DT, id, year)

fill_gap fills in gaps in a data.table along a time variable (corresponds to Stata tsfill)

DT <- data.table(
    id    = c(1, 1, 1, 2, 2),
    year  = c(1992, 1989, 1991, 1992, 1991),
    value = c(4.1, 4.5, 3.3, 3.2, 5.2)
)
fill_gap(DT, value, along_with = year, by = id)
library(lubridate)
DT[, date:= mdy(c("03/01/1992", "04/03/1992", "07/15/1992", "08/21/1992", "10/03/1992"))]
DT[, datem :=  as.monthly(date)]
fill_gap(DT, value, along_with = datem, by = id)

setna fills in missing values along a time variable. setna inherits from the data.table options roll and rollends

DT <- data.table(
 id    = c(1, 1, 1, 1, 2, 2),
 date  = c(1992, 1989, 1991, 1993, 1992, 1991),
 value = c(NA, NA, 3, NA, 3.2, 5.2)
)
DT1 <- copy(DT)
setkey(DT1, id, date)
DT2 <- copy(DT1)
DT3 <- copy(DT1)
setna(DT, value, along_with = date, by = id)
setna(DT1)
setna(DT2, value, rollends = TRUE)
setna(DT3, value, roll = "nearest")