seqmagick: sequence manipulation

## Warning: multiple methods tables found for 'setequal'
## Warning: replacing previous import 'BiocGenerics::setequal' by
## 'S4Vectors::setequal' when loading 'IRanges'
## Warning: replacing previous import 'BiocGenerics::setequal' by
## 'S4Vectors::setequal' when loading 'XVector'
## Warning: replacing previous import 'BiocGenerics::setequal' by
## 'S4Vectors::setequal' when loading 'GenomeInfoDb'
## Warning: replacing previous import 'BiocGenerics::setequal' by
## 'S4Vectors::setequal' when loading 'Biostrings'
## Warning: multiple methods tables found for 'setequal'

Download sequences

Genbank

tmpgb <- tempfile(fileext = '.gb')
tmpfa <- tempfile(fileext = '.fa')
download_genbank(acc='AB115403', format='genbank', outfile=tmpgb)
download_genbank(acc='AB115403', format='fasta', outfile=tmpfa)
## readLines(tmpgb)[1:10]
## readLines(tmpfa)

File conversion

fasta and phylip conversion

fa_file <- system.file("extdata/HA.fas", package="seqmagick")
## use the small subset to save compilation time of the vignette
fa2 <- tempfile(fileext = '.fa')
fa_read(fa_file) %>% bs_filter('ATGAAAGTAAAA', by='sequence') %>% fa_write(fa2, type='interleaved')


alnfas <- tempfile(fileext = ".fas")
fa_read(fa2) %>% bs_aln(quiet=TRUE) %>% fa_write(alnfas)

## phylip format is only for aligned sequences
tmpphy <- tempfile(fileext = ".phy")
fas2phy(alnfas, tmpphy, type = 'sequential')

seqmagick supports both sequential and interleaved formats, users can specify the format by type parameter.

phy2fas(tmpphy, alnfas, type = 'interleaved')

interleaved and sequential format conversion

tmpfas <- tempfile(fileext='.fa')
fa_read(fa2) %>% fa_write(tmpfas, type="sequential")
tmpphy2 <- tempfile(fileext = '.phy')
phy_read(tmpphy) %>% phy_write(tmpphy2, type="interleaved")

Sequence manipulation

bs <- fa_read(fa_file)
bs_filter(bs, 'ATGAAAGTAAAA', by='sequence')

aln <- bs_filter(bs, 'ATGAAAGTAAAA', by='sequence') %>% bs_aln(quiet=TRUE)

bs_consensus(aln)

Bugs/Feature requests

If you have any, let me know. Thx!

Session info

Here is the output of sessionInfo() on the system on which this document was compiled:

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Etc/UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] seqmagick_0.1.7     Biostrings_2.75.0   GenomeInfoDb_1.43.0
##  [4] XVector_0.47.0      IRanges_2.41.0      S4Vectors_0.45.0   
##  [7] BiocGenerics_0.53.1 generics_0.1.3      magrittr_2.0.3     
## [10] prettydoc_0.4.1    
## 
## loaded via a namespace (and not attached):
##  [1] crayon_1.5.3            httr_1.4.7              cli_3.6.3              
##  [4] knitr_1.48              rlang_1.1.4             xfun_0.49              
##  [7] UCSC.utils_1.3.0        jsonlite_1.8.9          buildtools_1.0.0       
## [10] htmltools_0.5.8.1       maketools_1.3.1         sys_3.4.3              
## [13] sass_0.4.9              rmarkdown_2.28          evaluate_1.0.1         
## [16] jquerylib_0.1.4         fastmap_1.2.0           yaml_2.3.10            
## [19] lifecycle_1.0.4         compiler_4.4.1          fs_1.6.5               
## [22] digest_0.6.37           R6_2.5.1                GenomeInfoDbData_1.2.13
## [25] bslib_0.8.0             tools_4.4.1             zlibbioc_1.52.0        
## [28] yulab.utils_0.1.7.001   cachem_1.1.0

References