voice vignette

version 0.4.15

Filipe J. Zabala

2022-09-07

0. Installation

https://github.com/filipezabala/voice

Load packages and audio files

# packs
library(voice)

# get path to audio file
wavDir <- list.files(system.file('extdata', package = 'wrassp'),
                     pattern <- glob2rx('*.wav'), full.names = TRUE)

1. Extract features

Examples

# minimal usage
M <- voice::extract_features(wavDir)
M
#> # A tibble: 2,389 × 61
#>    slice…¹ slice…² wav_p…³    f0    f1    f2    f3    f4    f5    f6    f7    f8
#>      <int>   <int> <chr>   <dbl> <int> <int> <int> <int> <int> <int> <int> <int>
#>  1       1       1 /usr/l…    NA    NA  1863    NA  3087  4218  5233  6144  6643
#>  2       2       2 /usr/l…    NA    NA  1863    NA  3179  4172  5259  6712    NA
#>  3       3       3 /usr/l…    NA    NA  1933  3055  3731  4663  5658  6775    NA
#>  4       4       4 /usr/l…    NA    NA  1777  2791  3712  4690  5657  6602  7771
#>  5       5       5 /usr/l…    NA    NA  1710  2690  3536  4677  5653  6526    NA
#>  6       6       6 /usr/l…    NA    NA  1794  2673  3560  4402  5119  6499    NA
#>  7       7       7 /usr/l…    NA    NA  1873  2665  3640  4346  5164  6516    NA
#>  8       8       8 /usr/l…    NA    NA  1932  2684  3550  4304  5190  6518    NA
#>  9       9       9 /usr/l…    NA   148  1960  2648  3248  3640  5214  6498  7668
#> 10      10      10 /usr/l…    NA   202  1965  2650  3284  3664  5210  6497  7716
#> # … with 2,379 more rows, 49 more variables: mfcc1 <dbl>, mfcc2 <dbl>,
#> #   mfcc3 <dbl>, mfcc4 <dbl>, mfcc5 <dbl>, mfcc6 <dbl>, mfcc7 <dbl>,
#> #   mfcc8 <dbl>, mfcc9 <dbl>, mfcc10 <dbl>, mfcc11 <dbl>, mfcc12 <dbl>,
#> #   df2 <dbl>, df3 <dbl>, df4 <dbl>, df5 <dbl>, df6 <dbl>, df7 <dbl>,
#> #   df8 <dbl>, pf1 <dbl>, pf2 <dbl>, pf3 <dbl>, pf4 <dbl>, pf5 <dbl>,
#> #   pf6 <dbl>, pf7 <dbl>, pf8 <dbl>, rf1 <dbl>, rf2 <dbl>, rf3 <dbl>,
#> #   rf4 <dbl>, rf5 <dbl>, rf6 <dbl>, rf7 <dbl>, rf8 <dbl>, rcf2 <dbl>, …

2. Tag

# creating Extended synthetic data
E <- dplyr::tibble(subject_id = c(1,1,1,2,2,2,3,3,3), wav_path = wavDir)
E
#> # A tibble: 9 × 2
#>   subject_id wav_path                                               
#>        <dbl> <chr>                                                  
#> 1          1 /usr/local/lib/R/site-library/wrassp/extdata/lbo001.wav
#> 2          1 /usr/local/lib/R/site-library/wrassp/extdata/lbo002.wav
#> 3          1 /usr/local/lib/R/site-library/wrassp/extdata/lbo003.wav
#> 4          2 /usr/local/lib/R/site-library/wrassp/extdata/lbo004.wav
#> 5          2 /usr/local/lib/R/site-library/wrassp/extdata/lbo005.wav
#> 6          2 /usr/local/lib/R/site-library/wrassp/extdata/lbo006.wav
#> 7          3 /usr/local/lib/R/site-library/wrassp/extdata/lbo007.wav
#> 8          3 /usr/local/lib/R/site-library/wrassp/extdata/lbo008.wav
#> 9          3 /usr/local/lib/R/site-library/wrassp/extdata/lbo009.wav

# minimal usage
voice::tag(E)
#> # A tibble: 9 × 7
#>   wav_path                       f0_ta…¹ f0_ta…² f0_ta…³ f0_ta…⁴ f0_ta…⁵ f0_ta…⁶
#>   <chr>                            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
#> 1 /usr/local/lib/R/site-library…    85.4    17.6   0.206    76.1    29.4    7.53
#> 2 /usr/local/lib/R/site-library…    85.4    15.6   0.183    80.1    27.8   14.4 
#> 3 /usr/local/lib/R/site-library…    84.6    13.0   0.154    78.8    23.9   14.0 
#> 4 /usr/local/lib/R/site-library…    84.8    14.5   0.171    79.1    28.1   11.9 
#> 5 /usr/local/lib/R/site-library…    86.0    14.7   0.170    78.7    30.0   11.0 
#> 6 /usr/local/lib/R/site-library…    82.9    15.6   0.188    74.8    23.8    4.78
#> 7 /usr/local/lib/R/site-library…    78.2    16.2   0.207    73.5    13.4    6.82
#> 8 /usr/local/lib/R/site-library…    84.5    14.5   0.172    78.1    17.8    8.95
#> 9 /usr/local/lib/R/site-library…    81.0    12.2   0.151    75.9    23.1    9.14
#> # … with abbreviated variable names ¹​f0_tag_mean, ²​f0_tag_sd, ³​f0_tag_vc,
#> #   ⁴​f0_tag_median, ⁵​f0_tag_iqr, ⁶​f0_tag_mad

# canonical data
voice::tag(E, groupBy = 'subject_id')
#> # A tibble: 3 × 7
#>   subject_id f0_tag_mean f0_tag_sd f0_tag_vc f0_tag_median f0_tag_iqr f0_tag_mad
#>        <dbl>       <dbl>     <dbl>     <dbl>         <dbl>      <dbl>      <dbl>
#> 1          1        85.1      15.3     0.180          78.3       26.8      11.9 
#> 2          2        84.6      14.9     0.176          76.4       28.3       7.97
#> 3          3        81.0      14.6     0.180          75.6       21.6       8.68

3. Voice2Sheet (experimental)

3.1. Extract features

Get audio

url0 <- 'https://github.com/filipezabala/voiceAudios/blob/main/mp3/doremi.mp3?raw=true'
download.file(url0, paste0(tempdir(), '/doremi.mp3'), mode = 'wb')
embedr::embed_audio(url0)

Convert mp3 to wav mono

cmd <- paste0('cd ', tempdir(), ';', 
' for i in *.[Mm][Pp]3; do ffmpeg -i "$i" -ac 1 "./${i%.*}.wav"; done')
system(cmd)

Extract features

M <- voice::extract_features(tempdir(), features = c('f0','gain'), round.to = 6)
summary(M)
#>    slice_seq      slice_seq_file     wav_path               f0       
#>  Min.   :   1.0   Min.   :   1.0   Length:1179        Min.   :120.2  
#>  1st Qu.: 295.5   1st Qu.: 295.5   Class :character   1st Qu.:149.4  
#>  Median : 590.0   Median : 590.0   Mode  :character   Median :206.0  
#>  Mean   : 590.0   Mean   : 590.0                      Mean   :208.9  
#>  3rd Qu.: 884.5   3rd Qu.: 884.5                      3rd Qu.:259.5  
#>  Max.   :1179.0   Max.   :1179.0                      Max.   :352.3  
#>                                                       NA's   :223    
#>       gain      
#>  Min.   :10.76  
#>  1st Qu.:19.34  
#>  Median :21.37  
#>  Mean   :21.48  
#>  3rd Qu.:23.23  
#>  Max.   :34.33  
#> 

3.2. Music sheet

Compress to 1%

# compress
M1 <- voice::conv_df(M, 0.01, id = 3)

# assign notes
M1$f0_spn <- voice::notes(M1$f0)

# duration
d1 <- voice::duration(M1$f0_spn)

# gm by Renfei Mao
library(gm)
m <- gm::Music()
m <- m +
  gm::Meter(4, 4) +
  gm::Line(pitches = as.list(as.character(d1$note)),
           durations = as.list(d1$dur_line)) +
  gm::Tempo(170)
gm::show(m, to = c('score', 'audio'))