voice
vignette# packs
library(voice)
library(tidyverse)
library(music)
library(gm)
The /mp3
and /split
directories will be used in Example 2.
<- '~/Downloads/voiceAudios'
vaDir <- paste0(vaDir, '/wav')
wavDir <- paste0(vaDir, '/mp3')
mp3Dir <- paste0(vaDir, '/rttm')
rttmDir <- paste0(vaDir, '/split')
splitDir <- paste0(vaDir, '/musicxml')
mxmlDir ifelse(!dir.exists(vaDir), dir.create(vaDir), 'Directory exists!')
#> [1] "Directory exists!"
ifelse(!dir.exists(wavDir), dir.create(wavDir), 'Directory exists!')
#> [1] "Directory exists!"
ifelse(!dir.exists(mp3Dir), dir.create(mp3Dir), 'Directory exists!')
#> [1] "Directory exists!"
ifelse(!dir.exists(rttmDir), dir.create(rttmDir), 'Directory exists!')
#> [1] "Directory exists!"
ifelse(!dir.exists(splitDir), dir.create(splitDir), 'Directory exists!')
#> [1] "Directory exists!"
ifelse(!dir.exists(mxmlDir), dir.create(mxmlDir), 'Directory exists!')
#> [1] "Directory exists!"
<- paste0('rm ', wavDir, '/*.*')
cmd system(cmd)
Automatize and test autoDir
argument @ splitw.R.
Parameters fromWav, fromRttm admits either file or directory @ splitw.R.
<- 'http://www.filipezabala.com/audio/bebezinho_2005.wav'
url0 download.file(url0, paste0(wavDir, '/bebezinho_2005.wav'), mode = 'wb')
# music::playWave(paste0(wavDir, '/bebezinho_2005.wav'))
At url0 <- 'https://github.com/filipezabala/voiceAudios/raw/main/bebezinho_2.005.wav'
, find out why the downloaded file is called wav_bebezinho_2.005.wav
.
Insert a button to play audio files.
<- voice::extract_features(wavDir, features = c('f0','formants','gain'),
ef round.to = 6, windowShift = 5)
#> PROGRESS 100%
#> FILE 1 OF 1 | 0.001 SECONDS
#>
#> TOTAL TIME 0.513 SECONDS
ef#> # A tibble: 400 × 11
#> file_name F0 F1 F2 F3 F4 F5 F6 F7 F8 GAIN
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 bebezinho_2005.w… 156. 323 1658 2940 NA 4302 5306 6257 7221 51.8
#> 2 bebezinho_2005.w… 158. 299 1843 NA 3085 4196 5262 6209 7219 48.9
#> 3 bebezinho_2005.w… 160. 297 1838 1878 3137 4111 5074 6129 7238 41.3
#> 4 bebezinho_2005.w… 159. 302 1814 2080 3135 4096 4539 6222 7268 29.7
#> 5 bebezinho_2005.w… 158. 304 1902 1964 3100 4125 4431 6255 7281 25.0
#> 6 bebezinho_2005.w… 153. 303 1660 2027 3068 4180 4267 6304 7301 25.1
#> 7 bebezinho_2005.w… 151. 300 1672 1988 3087 4051 4190 6287 7267 26.0
#> 8 bebezinho_2005.w… 148. 295 1644 1922 3100 4155 NA 6245 7187 27.0
#> 9 bebezinho_2005.w… 145. 294 1927 3101 3904 4178 NA 6221 7163 27.3
#> 10 bebezinho_2005.w… 145. 298 1937 NA 3102 4189 NA 6080 7181 26.5
#> # … with 390 more rows
Automatize and test stereo2mono
.
Test different windowShift values.
Write the function smooth_df(x,k)
.
<- seq(1,11,2)
odd <- vector('list', length = length(odd))
ef_sm names(ef_sm) <- paste0('k=', odd)
for(i in 1:length(ef_sm)){
<- voice::smooth_df(ef, k = i)
ef_sm[[i]]
}#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
sapply(ef_sm, dim)
#> k=1 k=3 k=5 k=7 k=9 k=11
#> [1,] 400 399 398 397 396 395
#> [2,] 11 11 11 11 11 11
par(mfrow = c(2,3))
for(i in 1:length(ef_sm)){
plot(ef_sm[[i]]$F0, main = paste0('k = ', odd[i]))
}
for(i in 1:length(ef_sm)){
$F0_spn <- voice::notes(ef_sm[[i]]$F0, measure = 'spn')
ef_sm[[i]]$F0_midi <- voice::notes(ef_sm[[i]]$F0, measure = 'midi')
ef_sm[[i]]$F0_octave <- voice::notes(ef_sm[[i]]$F0, measure = 'octave')
ef_sm[[i]]$F0_black <- voice::notes(ef_sm[[i]]$F0, measure = 'black')
ef_sm[[i]]
}lapply(ef_sm, head)
#> $`k=1`
#> # A tibble: 6 × 15
#> file_name F0 F1 F2 F3 F4 F5 F6 F7 F8 GAIN F0_spn
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
#> 1 bebezinho_… 156. 323 1658 2940 NA 4302 5306 6257 7221 51.8 D#3
#> 2 bebezinho_… 158. 299 1843 NA 3085 4196 5262 6209 7219 48.9 D#3
#> 3 bebezinho_… 160. 297 1838 1878 3137 4111 5074 6129 7238 41.3 D#3
#> 4 bebezinho_… 159. 302 1814 2080 3135 4096 4539 6222 7268 29.7 D#3
#> 5 bebezinho_… 158. 304 1902 1964 3100 4125 4431 6255 7281 25.0 D#3
#> 6 bebezinho_… 153. 303 1660 2027 3068 4180 4267 6304 7301 25.1 D#3
#> # … with 3 more variables: F0_midi <int>, F0_octave <fct>, F0_black <dbl>
#>
#> $`k=3`
#> # A tibble: 6 × 15
#> file_name F0 F1 F2 F3 F4 F5 F6 F7 F8 GAIN F0_spn
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
#> 1 bebezinho_… 157. 311 1750. NA NA 4249 5284 6233 7220 50.3 D#3
#> 2 bebezinho_… 159. 298 1840. NA 3111 4154. 5168 6169 7228. 45.1 D#3
#> 3 bebezinho_… 159. 300. 1826 1979 3136 4104. 4806. 6176. 7253 35.5 D#3
#> 4 bebezinho_… 158. 303 1858 2022 3118. 4110. 4485 6238. 7274. 27.4 D#3
#> 5 bebezinho_… 156. 304. 1781 1996. 3084 4152. 4349 6280. 7291 25.1 D#3
#> 6 bebezinho_… 152. 302. 1666 2008. 3078. 4116. 4228. 6296. 7284 25.6 D#3
#> # … with 3 more variables: F0_midi <int>, F0_octave <fct>, F0_black <dbl>
#>
#> $`k=5`
#> # A tibble: 6 × 15
#> file_name F0 F1 F2 F3 F4 F5 F6 F7 F8 GAIN F0_spn
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
#> 1 bebezinho_… 158. 306. 1780. NA NA 4203 5214 6198. 7226 47.3 D#3
#> 2 bebezinho_… 159. 299. 1832. NA 3119 4134. 4958. 6187. 7242. 40.0 D#3
#> 3 bebezinho_… 159. 301 1851. 1974 3124 4111. 4681. 6202 7262. 32.0 D#3
#> 4 bebezinho_… 157. 303 1792 2024. 3101 4134. 4412. 6260. 7283. 26.6 D#3
#> 5 bebezinho_… 154. 302. 1745. 1993 3085 4119. 4296 6282 7283 25.4 D#3
#> 6 bebezinho_… 151. 299. 1659. 1979 3085 4129. NA 6279. 7252. 26.0 D3
#> # … with 3 more variables: F0_midi <int>, F0_octave <fct>, F0_black <dbl>
#>
#> $`k=7`
#> # A tibble: 6 × 15
#> file_name F0 F1 F2 F3 F4 F5 F6 F7 F8 GAIN F0_spn
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
#> 1 bebezinho_… 158. 305. 1788. NA NA 4176. 5045. 6204. 7236. 42.9 D#3
#> 2 bebezinho_… 159. 300. 1849. NA 3114. 4132 4826. 6204. 7252. 36.2 D#3
#> 3 bebezinho_… 157. 302. 1804. 1987. 3110 4128 4578. 6228. 7272 30.3 D#3
#> 4 bebezinho_… 155. 302. 1762 2015. 3098. 4113 4357. 6267 7279. 26.5 D#3
#> 5 bebezinho_… 153. 300. 1720. 1975. 3089. 4128. NA 6273. 7259 25.8 D#3
#> 6 bebezinho_… 149. 298 1726. 2260. 3290. 4141 NA 6264. 7230. 26.4 D3
#> # … with 3 more variables: F0_midi <int>, F0_octave <fct>, F0_black <dbl>
#>
#> $`k=9`
#> # A tibble: 6 × 15
#> file_name F0 F1 F2 F3 F4 F5 F6 F7 F8 GAIN F0_spn
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
#> 1 bebezinho_… 158. 305 1811 NA NA 4166 4922. 6214. 7245. 39.3 D#3
#> 2 bebezinho_… 158. 301 1811. NA 3105 4142. 4715. 6224. 7261. 34.0 D#3
#> 3 bebezinho_… 156. 301. 1777. 1987. 3105. 4113. 4500. 6239. 7271 29.4 D#3
#> 4 bebezinho_… 154. 301. 1738. 1996. 3098 4121. NA 6263. 7261. 26.6 D#3
#> 5 bebezinho_… 151. 299. 1761 2200. 3252. 4138. NA 6262. 7240. 26.1 D3
#> 6 bebezinho_… 148. 298 1768 NA 3252. 4151. NA 6227. 7220. 26.4 D3
#> # … with 3 more variables: F0_midi <int>, F0_octave <fct>, F0_black <dbl>
#>
#> $`k=11`
#> # A tibble: 6 × 15
#> file_name F0 F1 F2 F3 F4 F5 F6 F7 F8 GAIN F0_spn
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
#> 1 bebezinho_… 157. 305. 1786. NA NA 4168. 4813. 6229. 7255. 37.0 D#3
#> 2 bebezinho_… 157. 301. 1788. NA 3102 4126. 4627. 6234. 7262. 32.7 D#3
#> 3 bebezinho_… 155. 300. 1755 1976. 3104. 4120. NA 6240. 7257 29.0 D#3
#> 4 bebezinho_… 152. 300. 1770. 2180. 3232. 4131. NA 6256. 7244. 26.7 D#3
#> 5 bebezinho_… 150. 299 1790. NA 3227. 4146. NA 6232 7230 26.2 D3
#> 6 bebezinho_… 148. 298 1791. NA 3227 4160. NA 6199. 7221. 26.4 D3
#> # … with 3 more variables: F0_midi <int>, F0_octave <fct>, F0_black <dbl>
Write plot_note function to show both frequency and notes in spn, midi and black formats.
Write a function to, given a frequency in Hz, convert in any SPN.
Implement and study major/minor chords identifier (consider tabr
functions).
Study sequences (scales and arpeggios).
<- vector('list', length(ef_sm)); names(dur_spn) <- paste0('k=', odd)
dur_spn <- vector('list', length(ef_sm)); names(dur_midi) <- paste0('k=', odd)
dur_midi <- vector('list', length(ef_sm)); names(dur_octave) <- paste0('k=', odd)
dur_octave <- vector('list', length(ef_sm)); names(dur_black) <- paste0('k=', odd)
dur_black for(i in 1:length(ef_sm)){
<- voice::duration(ef_sm[[i]]$F0_spn)
dur_spn[[i]] <- voice::duration(ef_sm[[i]]$F0_midi)
dur_midi[[i]] <- voice::duration(ef_sm[[i]]$F0_octave)
dur_octave[[i]] <- voice::duration(ef_sm[[i]]$F0_black)
dur_black[[i]]
}1]]
dur_spn[[#> note dur_line dur_ms dur_prop
#> 1 D#3 7 35 0.0175
#> 2 D3 18 90 0.0450
#> 3 C#3 1 5 0.0025
#> 4 D3 6 30 0.0150
#> 5 C#3 2 10 0.0050
#> 6 D3 3 15 0.0075
#> 7 C#3 2 10 0.0050
#> 8 D3 2 10 0.0050
#> 9 C#3 3 15 0.0075
#> 10 C3 11 55 0.0275
#> 11 B2 1 5 0.0025
#> 12 A#2 3 15 0.0075
#> 13 <NA> 8 40 0.0200
#> 14 C#3 3 15 0.0075
#> 15 D3 4 20 0.0100
#> 16 C#3 4 20 0.0100
#> 17 C3 1 5 0.0025
#> 18 C#3 12 60 0.0300
#> 19 C3 1 5 0.0025
#> 20 C#3 3 15 0.0075
#> 21 C3 1 5 0.0025
#> 22 C#3 7 35 0.0175
#> 23 C3 1 5 0.0025
#> 24 C#3 9 45 0.0225
#> 25 C3 2 10 0.0050
#> 26 C#3 1 5 0.0025
#> 27 C3 2 10 0.0050
#> 28 C#3 2 10 0.0050
#> 29 <NA> 26 130 0.0650
#> 30 G3 2 10 0.0050
#> 31 A3 1 5 0.0025
#> 32 A#3 4 20 0.0100
#> 33 C4 10 50 0.0250
#> 34 C#4 3 15 0.0075
#> 35 C4 1 5 0.0025
#> 36 C#4 14 70 0.0350
#> 37 C4 1 5 0.0025
#> 38 C#4 12 60 0.0300
#> 39 C4 1 5 0.0025
#> 40 C#4 10 50 0.0250
#> 41 C4 1 5 0.0025
#> 42 C#4 3 15 0.0075
#> 43 C4 1 5 0.0025
#> 44 C#4 3 15 0.0075
#> 45 C4 1 5 0.0025
#> 46 C#4 17 85 0.0425
#> 47 C4 1 5 0.0025
#> 48 C#4 30 150 0.0750
#> 49 C4 1 5 0.0025
#> 50 C#4 1 5 0.0025
#> 51 D4 1 5 0.0025
#> 52 C#4 2 10 0.0050
#> 53 C4 1 5 0.0025
#> 54 C#4 1 5 0.0025
#> 55 C4 1 5 0.0025
#> 56 B3 6 30 0.0150
#> 57 A#3 4 20 0.0100
#> 58 A3 9 45 0.0225
#> 59 A#3 3 15 0.0075
#> 60 A3 1 5 0.0025
#> 61 A#3 5 25 0.0125
#> 62 A3 46 230 0.1150
#> 63 A#3 1 5 0.0025
#> 64 A3 1 5 0.0025
#> 65 A#3 53 265 0.1325
#> 66 <NA> 1 5 0.0025
<- vector('list', length(ef_sm))
nd for(i in 1:length(ef_sm)){
<- music::noteDistance(as.character(dur_spn[[i]]$note))
nd[[i]] print(summary(nd[[i]]))
}#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> -1.00000 -1.00000 -1.00000 -0.03279 1.00000 2.00000 5
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> -2.00000 -1.00000 -0.50000 -0.04762 1.00000 1.00000 5
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> -1.0000 -1.0000 -1.0000 -0.1071 1.0000 1.0000 5
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> -1.0000 -1.0000 -1.0000 -0.1071 1.0000 1.0000 5
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> -1.0000 -1.0000 -1.0000 -0.1739 1.0000 1.0000 5
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> -1.0000 -1.0000 -1.0000 -0.2105 1.0000 1.0000 5
1]]
nd[[#> Eb3 D3 Db3 D3 Db3 D3 Db3 D3 Db3 C3 B2 Bb2 <NA> Db3 D3 Db3
#> 0 -1 -1 1 -1 1 -1 1 -1 -1 -1 -1 NA NA 1 -1
#> C3 Db3 C3 Db3 C3 Db3 C3 Db3 C3 Db3 C3 Db3 <NA> G3 A3 Bb3
#> -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 NA NA 2 1
#> C4 Db4 C4 Db4 C4 Db4 C4 Db4 C4 Db4 C4 Db4 C4 Db4 C4 Db4
#> 2 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1
#> C4 Db4 D4 Db4 C4 Db4 C4 B3 Bb3 A3 Bb3 A3 Bb3 A3 Bb3 A3
#> -1 1 1 -1 -1 1 -1 -1 -1 -1 1 -1 1 -1 1 -1
#> Bb3 <NA>
#> 1 NA
Extract and test microtones. (Giuliano’s idea.)
# compress
<- conv_df(ef, 0.01)
ef_01 # assign notes
$F0_spn <- voice::notes(ef_01$F0, measure = 'spn')
ef_01# duration
<- voice::duration(ef_01$F0_spn)
nd_01 # still manual!!
<- list('D#3','F3','C#4','A#3')
li length(li)
#> [1] 4
# gm tool
<- gm::Music()
m01 <- m01 +
m01 # add a 4/4 time signature
::Meter(4, 4) +
gm# MANUALLY adding notes
::Line(pitches = li,
gmdurations = list(1,1,1,1)) +
# tempo
Tempo(170)
m01#> Music
#>
#> Line 1
#>
#> * as part 1 staff 1 voice 1
#> * of length 4
#> * of pitches D#3, F3, C#4, A#3
#> * of durations 1, 1, 1, 1
#>
#> Meter 4/4
#>
#> Tempo quarter = 170
show(m01, to = c('score', 'audio'))
k=21
.# smooth
<- voice::smooth_df(ef, k = 21)
ef_sm_21 #> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
# assign notes
$F0_spn <- voice::notes(ef_sm_21$F0, measure = 'spn')
ef_sm_21# duration
<- voice::duration(ef_sm_21$F0_spn)
nd_21 # still manual!!
<- list('D3','C#3','C3',NA,'C#3',NA,'B3','C4','C#4','C4','B3','A#3','A3','A#3',NA)
li length(li)
#> [1] 15
# gm tool
<- gm::Music()
m21 <- m21 +
m21 # add a 4/4 time signature
::Meter(4, 4) +
gm# MANUALLY adding notes
::Line(pitches = li,
gmdurations = list(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)) +
# tempo
Tempo(999)
m21#> Music
#>
#> Line 1
#>
#> * as part 1 staff 1 voice 1
#> * of length 15
#> * of pitches D3, C#3, C3, _, C#3, _, B3, C4, C#4, C4, B3, A#3, A3, A#3, _
#> * of durations 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
#>
#> Meter 4/4
#>
#> Tempo quarter = 999
show(m21, to = c('score', 'audio'))
Automate pitches
and durations
at gm::Line
to capture spn/midi from duration
data frame.
::export(m01, mxmlDir, 'bebezinho_01', 'musicxml')
gm::export(m21, mxmlDir, 'bebezinho_21', 'musicxml') gm
# font url
<- 'https://github.com/filipezabala/voiceAudios/raw/main/mp3/'
url0
# mp3 files
<- c('anthem0.mp3', 'anthem1.mp3', 'anthem2.mp3',
mp3Files 'game0.mp3', 'game1.mp3', 'game2.mp3',
'phantom0.mp3', 'phantom1.mp3', 'phantom2.mp3',
'romeo0.mp3', 'romeo1.mp3', 'romeo2.mp3',
'sherlock0.mp3', 'sherlock1.mp3', 'sherlock2.mp3',
'war0.mp3', 'war1.mp3', 'war2.mp3')
# downloading just the first mp3 file (remove [1] to download all)
for(i in mp3Files[1]){
system(paste0('wget -r -np -k ', url0, i, ' -P ~/Downloads/voiceAudios/mp3'))
}
# tidying up files and directories
system('cp ~/Downloads/voiceAudios/mp3/github.com/filipezabala/voiceAudios/raw/main/mp3/*.* ~/Downloads/voiceAudios/mp3')
system('rm -rf ~/Downloads/voiceAudios/mp3/github.com/')
<- paste0('rm ', wavDir, '/*.*')
cmd system(cmd)
<- 'cd ~/Downloads/voiceAudios/mp3;
cmd for i in *.[Mm][Pp]3; do ffmpeg -i "$i" "../wav/${i%.*}.wav"; done'
system(cmd)
Automatize and test mp32wav.
Test the hypotheses
H1: multiple mp32wav2mp32wav… conversions reduce the audio quality.
H2: What is the impact in the decision using
i. original wav
ii. original mp3 (that must be converted to wav)
iii. H1, if H1 is consistent
The best words in their best order. Takes around the audio time at 8CPU and the double of the audio time at 4CPU.
::poetry(wavDir, pycall = '/home/linuxbrew/.linuxbrew/bin/python3.9') # Linux
voice#> Time difference of 24.90145 secs
# voice::poetry(wavDir, to = rttmDir, pycall = '~/miniconda3/envs/pyvoice38/bin/python3.8') # Mac
Automate speaker recognition (who speaks when?) @ poetry.R.
# split wave
<- Sys.time()
ini ::splitw(wavDir, fromRttm = rttmDir, to = splitDir)
voiceSys.time()-ini
#> Time difference of 0.1140707 secs
Automate argument compact.to
at voice::conv_df
.
No gain testing weighting F0 considering GAIN and other features at voice::conv_df
. [TEST1] No gain testing weighting F0 considering GAIN.
Test ZCR to infer BPM.
Embed gm::show
function at MacOS. Must solve: - Error in magick_image_write(image, format, quality, depth, density, comment, : rsession: NegativeOrZeroImageSize `’ @ error/image.c/CloneImage/794
Find the ‘best’ set of variables in different contexts.
Standardize silence.gap
argument at voice::splitw
considering NIST (National Institute of Standards and Technology).
. [WORK IN PROGRESS] As it says, support is welcome.
. [SOLVED @ ‘%Y-%m-%d’ VERSION major.minor.subminor].
. [TESTED] During the tests no gain were obtained.
Sys.time()-ini0
#> Time difference of 38.55474 secs