Download sequences

Genbank

tmpgb <- tempfile(fileext = '.gb')
tmpfa <- tempfile(fileext = '.fa')
download_genbank(acc='AB115403', format='genbank', outfile=tmpgb)
download_genbank(acc='AB115403', format='fasta', outfile=tmpfa)
## readLines(tmpgb)[1:10]
## readLines(tmpfa)

File conversion

fasta and phylip conversion

fa_file <- system.file("extdata/HA.fas", package="seqmagick")
## use the small subset to save compilation time of the vignette
fa2 <- tempfile(fileext = '.fa')
fa_read(fa_file) %>% bs_filter('ATGAAAGTAAAA', by='sequence') %>% fa_write(fa2, type='interleaved')


alnfas <- tempfile(fileext = ".fas")
fa_read(fa2) %>% bs_aln(quiet=TRUE) %>% fa_write(alnfas)

## phylip format is only for aligned sequences
tmpphy <- tempfile(fileext = ".phy")
fas2phy(alnfas, tmpphy, type = 'sequential')

seqmagick supports both sequential and interleaved formats, users can specify the format by type parameter.

phy2fas(tmpphy, alnfas, type = 'interleaved')

interleaved and sequential format conversion

tmpfas <- tempfile(fileext='.fa')
fa_read(fa2) %>% fa_write(tmpfas, type="sequential")
tmpphy2 <- tempfile(fileext = '.phy')
phy_read(tmpphy) %>% phy_write(tmpphy2, type="interleaved")

Sequence manipulation

bs <- fa_read(fa_file)
bs_filter(bs, 'ATGAAAGTAAAA', by='sequence')

aln <- bs_filter(bs, 'ATGAAAGTAAAA', by='sequence') %>% bs_aln(quiet=TRUE)

bs_consensus(aln)

Bugs/Feature requests

If you have any, let me know. Thx!

Session info

Here is the output of sessionInfo() on the system on which this document was compiled:

## R version 4.5.2 (2025-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=C                               
## [2] LC_CTYPE=Chinese (Simplified)_China.utf8   
## [3] LC_MONETARY=Chinese (Simplified)_China.utf8
## [4] LC_NUMERIC=C                               
## [5] LC_TIME=Chinese (Simplified)_China.utf8    
## 
## time zone: Asia/Shanghai
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] seqmagick_0.1.8     Biostrings_2.78.0   Seqinfo_1.0.0      
## [4] XVector_0.50.0      IRanges_2.44.0      S4Vectors_0.48.0   
## [7] BiocGenerics_0.56.0 generics_0.1.4      magrittr_2.0.4     
## 
## loaded via a namespace (and not attached):
##  [1] crayon_1.5.3      cli_3.6.5         knitr_1.51        rlang_1.1.7      
##  [5] xfun_0.56         otel_0.2.0        jsonlite_2.0.0    htmltools_0.5.9  
##  [9] sass_0.4.10       rappdirs_0.3.4    rmarkdown_2.30    evaluate_1.0.5   
## [13] jquerylib_0.1.4   prettydoc_0.4.1   fastmap_1.2.0     yaml_2.3.12      
## [17] lifecycle_1.0.5   compiler_4.5.2    fs_1.6.6          digest_0.6.39    
## [21] R6_2.6.1          bslib_0.9.0       tools_4.5.2       yulab.utils_0.2.3
## [25] cachem_1.1.0

References