Example 3: Reshape

When I design longer_dt and wider_dt, I could find the pivot_longer and pivot_wider in tidyr and melt and dcast in data.table. Still, designing this API is not easy, as my goal is to let users use it with least pain. Here we would try to reproduce the results in the vignette of tidyr(https://tidyr.tidyverse.org/articles/pivot.html). First load the packages:

library(tidyfst)
library(tidyr)

Longer

First inspect the data:

relig_income
#> # A tibble: 18 x 11
#>    religion `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
#>    <chr>      <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>
#>  1 Agnostic      27        34        60        81        76       137        122
#>  2 Atheist       12        27        37        52        35        70         73
#>  3 Buddhist      27        21        30        34        33        58         62
#>  4 Catholic     418       617       732       670       638      1116        949
#>  5 Don’t k~      15        14        15        11        10        35         21
#>  6 Evangel~     575       869      1064       982       881      1486        949
#>  7 Hindu          1         9         7         9        11        34         47
#>  8 Histori~     228       244       236       238       197       223        131
#>  9 Jehovah~      20        27        24        24        21        30         15
#> 10 Jewish        19        19        25        25        30        95         69
#> 11 Mainlin~     289       495       619       655       651      1107        939
#> 12 Mormon        29        40        48        51        56       112         85
#> 13 Muslim         6         7         9        10         9        23         16
#> 14 Orthodox      13        17        23        32        32        47         38
#> 15 Other C~       9         7        11        13        13        14         18
#> 16 Other F~      20        33        40        46        49        63         46
#> 17 Other W~       5         2         3         4         2         7          3
#> 18 Unaffil~     217       299       374       365       341       528        407
#> # ... with 3 more variables: `$100-150k` <dbl>, `>150k` <dbl>, `Don't
#> #   know/refused` <dbl>

In tidyr, to get the longer format you need:

relig_income %>% 
  pivot_longer(-religion, names_to = "income", values_to = "count")

In tidyfst, we have:

relig_income %>% 
  longer_dt(group_to_keep = "religion",gathered_name = "income",gathered_value = "count")
#>                   religion             income count
#>                     <char>             <fctr> <num>
#>   1:              Agnostic              <$10k    27
#>   2:               Atheist              <$10k    12
#>   3:              Buddhist              <$10k    27
#>   4:              Catholic              <$10k   418
#>   5:    Don’t know/refused              <$10k    15
#>  ---                                               
#> 176:              Orthodox Don't know/refused    73
#> 177:       Other Christian Don't know/refused    18
#> 178:          Other Faiths Don't know/refused    71
#> 179: Other World Religions Don't know/refused     8
#> 180:          Unaffiliated Don't know/refused   597

Another example from tidyr:

billboard

# tidyr way:
# billboard %>% 
#   pivot_longer(
#     cols = starts_with("wk"), 
#     names_to = "week", 
#     values_to = "rank",
#     values_drop_na = TRUE
#   )

# tidyfst way:
billboard %>% 
  longer_dt("wk",negate = TRUE,  
            gathered_name = "week",
            gathered_value = "rank",
            na.rm = TRUE
            )
# regex should select group_to_keep, negate could reverse that
A warning would could come out because the merging column has different data types and do the coercion automatically.

Wider

## data
fish_encounters
#> # A tibble: 114 x 3
#>    fish  station  seen
#>    <fct> <fct>   <int>
#>  1 4842  Release     1
#>  2 4842  I80_1       1
#>  3 4842  Lisbon      1
#>  4 4842  Rstr        1
#>  5 4842  Base_TD     1
#>  6 4842  BCE         1
#>  7 4842  BCW         1
#>  8 4842  BCE2        1
#>  9 4842  BCW2        1
#> 10 4842  MAE         1
#> # ... with 104 more rows

## tidyr way:
fish_encounters %>% pivot_wider(names_from = station, values_from = seen)
#> # A tibble: 19 x 12
#>    fish  Release I80_1 Lisbon  Rstr Base_TD   BCE   BCW  BCE2  BCW2   MAE   MAW
#>    <fct>   <int> <int>  <int> <int>   <int> <int> <int> <int> <int> <int> <int>
#>  1 4842        1     1      1     1       1     1     1     1     1     1     1
#>  2 4843        1     1      1     1       1     1     1     1     1     1     1
#>  3 4844        1     1      1     1       1     1     1     1     1     1     1
#>  4 4845        1     1      1     1       1    NA    NA    NA    NA    NA    NA
#>  5 4847        1     1      1    NA      NA    NA    NA    NA    NA    NA    NA
#>  6 4848        1     1      1     1      NA    NA    NA    NA    NA    NA    NA
#>  7 4849        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#>  8 4850        1     1     NA     1       1     1     1    NA    NA    NA    NA
#>  9 4851        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 10 4854        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 11 4855        1     1      1     1       1    NA    NA    NA    NA    NA    NA
#> 12 4857        1     1      1     1       1     1     1     1     1    NA    NA
#> 13 4858        1     1      1     1       1     1     1     1     1     1     1
#> 14 4859        1     1      1     1       1    NA    NA    NA    NA    NA    NA
#> 15 4861        1     1      1     1       1     1     1     1     1     1     1
#> 16 4862        1     1      1     1       1     1     1     1     1    NA    NA
#> 17 4863        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 18 4864        1     1     NA    NA      NA    NA    NA    NA    NA    NA    NA
#> 19 4865        1     1      1    NA      NA    NA    NA    NA    NA    NA    NA

## tidyfst way:
fish_encounters %>% 
  wider_dt(name_to_spread = "station",value_to_spread = "seen")
#>       fish Release I80_1 Lisbon  Rstr Base_TD   BCE   BCW  BCE2  BCW2   MAE
#>     <fctr>   <int> <int>  <int> <int>   <int> <int> <int> <int> <int> <int>
#>  1:   4842       1     1      1     1       1     1     1     1     1     1
#>  2:   4843       1     1      1     1       1     1     1     1     1     1
#>  3:   4844       1     1      1     1       1     1     1     1     1     1
#>  4:   4845       1     1      1     1       1    NA    NA    NA    NA    NA
#>  5:   4847       1     1      1    NA      NA    NA    NA    NA    NA    NA
#>  6:   4848       1     1      1     1      NA    NA    NA    NA    NA    NA
#>  7:   4849       1     1     NA    NA      NA    NA    NA    NA    NA    NA
#>  8:   4850       1     1     NA     1       1     1     1    NA    NA    NA
#>  9:   4851       1     1     NA    NA      NA    NA    NA    NA    NA    NA
#> 10:   4854       1     1     NA    NA      NA    NA    NA    NA    NA    NA
#> 11:   4855       1     1      1     1       1    NA    NA    NA    NA    NA
#> 12:   4857       1     1      1     1       1     1     1     1     1    NA
#> 13:   4858       1     1      1     1       1     1     1     1     1     1
#> 14:   4859       1     1      1     1       1    NA    NA    NA    NA    NA
#> 15:   4861       1     1      1     1       1     1     1     1     1     1
#> 16:   4862       1     1      1     1       1     1     1     1     1    NA
#> 17:   4863       1     1     NA    NA      NA    NA    NA    NA    NA    NA
#> 18:   4864       1     1     NA    NA      NA    NA    NA    NA    NA    NA
#> 19:   4865       1     1      1    NA      NA    NA    NA    NA    NA    NA
#>       MAW
#>     <int>
#>  1:     1
#>  2:     1
#>  3:     1
#>  4:    NA
#>  5:    NA
#>  6:    NA
#>  7:    NA
#>  8:    NA
#>  9:    NA
#> 10:    NA
#> 11:    NA
#> 12:    NA
#> 13:     1
#> 14:    NA
#> 15:     1
#> 16:    NA
#> 17:    NA
#> 18:    NA
#> 19:    NA

If you want to fill with 0s, use:

fish_encounters %>% 
  wider_dt(name_to_spread = "station",value_to_spread = "seen",fill = 0)
#>       fish Release I80_1 Lisbon  Rstr Base_TD   BCE   BCW  BCE2  BCW2   MAE
#>     <fctr>   <int> <int>  <int> <int>   <int> <int> <int> <int> <int> <int>
#>  1:   4842       1     1      1     1       1     1     1     1     1     1
#>  2:   4843       1     1      1     1       1     1     1     1     1     1
#>  3:   4844       1     1      1     1       1     1     1     1     1     1
#>  4:   4845       1     1      1     1       1     0     0     0     0     0
#>  5:   4847       1     1      1     0       0     0     0     0     0     0
#>  6:   4848       1     1      1     1       0     0     0     0     0     0
#>  7:   4849       1     1      0     0       0     0     0     0     0     0
#>  8:   4850       1     1      0     1       1     1     1     0     0     0
#>  9:   4851       1     1      0     0       0     0     0     0     0     0
#> 10:   4854       1     1      0     0       0     0     0     0     0     0
#> 11:   4855       1     1      1     1       1     0     0     0     0     0
#> 12:   4857       1     1      1     1       1     1     1     1     1     0
#> 13:   4858       1     1      1     1       1     1     1     1     1     1
#> 14:   4859       1     1      1     1       1     0     0     0     0     0
#> 15:   4861       1     1      1     1       1     1     1     1     1     1
#> 16:   4862       1     1      1     1       1     1     1     1     1     0
#> 17:   4863       1     1      0     0       0     0     0     0     0     0
#> 18:   4864       1     1      0     0       0     0     0     0     0     0
#> 19:   4865       1     1      1     0       0     0     0     0     0     0
#>       MAW
#>     <int>
#>  1:     1
#>  2:     1
#>  3:     1
#>  4:     0
#>  5:     0
#>  6:     0
#>  7:     0
#>  8:     0
#>  9:     0
#> 10:     0
#> 11:     0
#> 12:     0
#> 13:     1
#> 14:     0
#> 15:     1
#> 16:     0
#> 17:     0
#> 18:     0
#> 19:     0

tidyfst currently does not support spreading multiple columns. I believe this way could keep this function simple and easy to understand. When you need this function, try spread them one by one (using a loop), or try data.table and tidyr.