Title: | Sequence Manipulation Utilities |
---|---|
Description: | Supports reading and writing sequences for different formats (currently interleaved and sequential formats for 'FASTA' and 'PHYLIP'), file conversion, and manipulation (e.g. filter sequences that contain specify pattern, export consensus sequence from an alignment). |
Authors: | Guangchuang Yu [aut, cre] |
Maintainer: | Guangchuang Yu <[email protected]> |
License: | Artistic-2.0 |
Version: | 0.1.7 |
Built: | 2024-11-03 04:36:14 UTC |
Source: | https://github.com/yulab-smu/seqmagick |
convert bam file to aligned fasta file
bam2DNAStringSet(bamfile, refseq)
bam2DNAStringSet(bamfile, refseq)
bamfile |
bam file |
refseq |
refseq, DNAStringSet object |
DNAStringSet object
Guangchuang Yu
convert bam file to aligned fasta file
bam2DNAStringSet2(bamfile, refseq)
bam2DNAStringSet2(bamfile, refseq)
bamfile |
bam file |
refseq |
refseq, DNAStringSet object |
DNAStringSet object
Guangchuang Yu
sequence alignment
bs_aln(x, method = "muscle", ...)
bs_aln(x, method = "muscle", ...)
x |
XStringSet object |
method |
alignment method |
... |
additional parameter |
aligned sequences, XStringSet object
Guangchuang Yu
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) bs_aln(x) ## End(Not run)
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) bs_aln(x) ## End(Not run)
biological sequence filter by searching pattern
bs_filter(x, pattern, by = "description", ignore.case = FALSE)
bs_filter(x, pattern, by = "description", ignore.case = FALSE)
x |
BStringSet object |
pattern |
keyword for filter |
by |
one of 'description' and 'sequence' |
ignore.case |
logical |
BStringSet object
Guangchuang Yu
fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) bs_filter(x, 'ATGAAAGTAAAA', by='sequence')
fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) bs_filter(x, 'ATGAAAGTAAAA', by='sequence')
hamming distances of sequences
bs_hamming(x, count_indel = FALSE, ...)
bs_hamming(x, count_indel = FALSE, ...)
x |
BStringSet object |
count_indel |
whether count indel or not |
... |
additional parameters |
hamming distance
Guangchuang Yu
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) ## align first 5 sequences, use `bs_aln(x)` to align all sequences aln <- bs_aln(x[1:5]) bs_hamming(aln) ## End(Not run)
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) ## align first 5 sequences, use `bs_aln(x)` to align all sequences aln <- bs_aln(x[1:5]) bs_hamming(aln) ## End(Not run)
rename sequence
bs_rename(x, mapping, position, sep, mode)
bs_rename(x, mapping, position, sep, mode)
x |
BStringSet object |
mapping |
two column data.frame |
position |
rename token at specific position |
sep |
sepator to divide token |
mode |
one of 'replace', 'prefix' or 'suffix' |
BStringSet
Guangchuang Yu
consensus of aligned sequences
consensus of aligned sequences
consensus(x, type = "DNA") bs_consensus(x, type = "DNA", r = 1)
consensus(x, type = "DNA") bs_consensus(x, type = "DNA", r = 1)
x |
BStringSet object |
type |
currently, only DNA supported |
r |
if any NT > r, it will be selected as representative base |
consensus sequence string
consensus sequence string
Guangchuang Yu
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) ## align first 5 sequences, use `bs_aln(x)` to align all sequences aln <- bs_aln(x[1:5]) ## or bs_consensus(aln) consensus(aln) ## End(Not run)
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) ## align first 5 sequences, use `bs_aln(x)` to align all sequences aln <- bs_aln(x[1:5]) ## or bs_consensus(aln) consensus(aln) ## End(Not run)
download genbank or fasta file by accession number
download_genbank(acc, db = "nuccore", format = "genbank", outfile = NULL, ...)
download_genbank(acc, db = "nuccore", format = "genbank", outfile = NULL, ...)
acc |
accession number(s) |
db |
supported db, currently 'nuccore' |
format |
one of 'genbank' or 'fasta' |
outfile |
output file, by default, acc.gb or acc.fa |
... |
additional parameters for download.file |
output file vector
Guangchuang Yu
## Not run: tmpgb <- tempfile(fileext = '.gb') tmpfa <- tempfile(fileext = '.fa') download_genbank(acc='AB115403', format='genbank', outfile=tmpgb) download_genbank(acc='AB115403', format='fasta', outfile=tmpfa) ## End(Not run)
## Not run: tmpgb <- tempfile(fileext = '.gb') tmpfa <- tempfile(fileext = '.fa') download_genbank(acc='AB115403', format='genbank', outfile=tmpgb) download_genbank(acc='AB115403', format='fasta', outfile=tmpfa) ## End(Not run)
combine 2 fasta files into 1
fa_combine(file1, file2, outfile = NULL, type = "interleaved")
fa_combine(file1, file2, outfile = NULL, type = "interleaved")
file1 |
fasta file 1 |
file2 |
fasta file 2 |
outfile |
output file |
type |
one of interleaved and sequential |
BStringSet
Guangchuang Yu
fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) fa1 <- tempfile(fileext=".fa") fa2 <- tempfile(fileext=".fa") fa_write(x[1:5], fa1) fa_write(x[6:10], fa2) fa_combine(fa1, fa2)
fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) fa1 <- tempfile(fileext=".fa") fa2 <- tempfile(fileext=".fa") fa_write(x[1:5], fa1) fa_write(x[6:10], fa2) fa_combine(fa1, fa2)
fasta filter by searching pattern
fa_filter( fasfile, pattern, by = "description", ignore.case = FALSE, outfile = NULL, type = "interleaved" )
fa_filter( fasfile, pattern, by = "description", ignore.case = FALSE, outfile = NULL, type = "interleaved" )
fasfile |
input fasta file |
pattern |
keyword for filter |
by |
one of 'description' and 'sequence' |
ignore.case |
logical |
outfile |
output file |
type |
one of 'interleaved' and 'sequential' |
BStringSet object
Guangchuang Yu
rename fasta sequence name
fa_rename(fasfile, mapping_file, position, sep, mode, outfile)
fa_rename(fasfile, mapping_file, position, sep, mode, outfile)
fasfile |
fasta file |
mapping_file |
mapping file |
position |
rename token at specific position |
sep |
sepator to divide token |
mode |
one of 'replace', 'prefix' or 'suffix' |
outfile |
output file |
BStringSet object
Guangchuang Yu
convert fasta file to interleaved format
convert fasta file to sequential format
fa_to_interleaved(file, outfile) fa_to_sequential(file, outfile)
fa_to_interleaved(file, outfile) fa_to_sequential(file, outfile)
file |
fasta file |
outfile |
output file |
None
None
Guangchuang Yu
fa_file <- system.file("extdata/HA.fas", package="seqmagick") fa1 <- tempfile(fileext = '.fa') fa2 <- tempfile(fileext = '.fa') fa_to_interleaved(fa_file, fa1) fa_to_sequential(fa_file, fa2)
fa_file <- system.file("extdata/HA.fas", package="seqmagick") fa1 <- tempfile(fileext = '.fa') fa2 <- tempfile(fileext = '.fa') fa_to_interleaved(fa_file, fa1) fa_to_sequential(fa_file, fa2)
write fasta file
fa_write(x, outfile, type = "interleaved")
fa_write(x, outfile, type = "interleaved")
x |
XStringSet object |
outfile |
output file |
type |
one of interleaved and sequential |
None
Guangchuang Yu
https://phylipweb.github.io/phylip/
phy_file <- system.file("extdata/HA.phy", package="seqmagick") x <- phy_read(phy_file) fa_file <- tempfile(fileext = '.fas') fa_write(x, fa_file)
phy_file <- system.file("extdata/HA.phy", package="seqmagick") x <- phy_read(phy_file) fa_file <- tempfile(fileext = '.fas') fa_write(x, fa_file)
convert fasta (aligned sequences) to phylip format
fas2phy(fasfile, outfile = "out.phy", type = "sequential")
fas2phy(fasfile, outfile = "out.phy", type = "sequential")
fasfile |
aligned sequences in fasta format |
outfile |
output file |
type |
one of interleaved and sequential |
None
Guangchuang Yu fa_file <- system.file("extdata/HA.fas", package="seqmagick") phy_file <- tempfile(fileext = ".phy") fas2phy(fa_file, phy_file)
extract accession number and sequence from genbank file
gb_read(file)
gb_read(file)
file |
input genbank file |
sequence object
Guangchuang Yu
get id at specific position
get_id(x, sep = " ", position)
get_id(x, sep = " ", position)
x |
sequence description line |
sep |
separator to split x |
position |
id position |
id
Guangchuang Yu
fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) get_id(names(x)[1:5], sep = " ", position=1)
fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) get_id(names(x)[1:5], sep = " ", position=1)
read sequence alignment file
mega_read(file) fa_read(file, type = "auto") clw_read(file, type = "auto") sth_read(file, type = "auto")
mega_read(file) fa_read(file, type = "auto") clw_read(file, type = "auto") sth_read(file, type = "auto")
file |
multiple sequence file |
type |
one of 'DNA', 'RNA', 'AA', 'Protein', 'unknown' or 'auto' |
BStringSet object
Guangchuang Yu
fa_file <- system.file("extdata/HA.fas", package="seqmagick") fa_read(fa_file) mega_file <- system.file("extdata/mega/Crab_rRNA.meg", package="seqmagick") mega_read(mega_file)
fa_file <- system.file("extdata/HA.fas", package="seqmagick") fa_read(fa_file) mega_file <- system.file("extdata/mega/Crab_rRNA.meg", package="seqmagick") mega_read(mega_file)
read aligned sequences in phylip format
phy_read(file)
phy_read(file)
file |
phylip file |
BStringSet object
Guangchuang Yu
phy_file <- system.file("extdata/HA.phy", package="seqmagick") phy_read(phy_file)
phy_file <- system.file("extdata/HA.phy", package="seqmagick") phy_read(phy_file)
write phylip file
phy_write(x, outfile, type = "sequential")
phy_write(x, outfile, type = "sequential")
x |
XStringSet object |
outfile |
output file |
type |
one of interleaved and sequential |
None
Guangchuang Yu
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) aln <- bs_aln(x[1:5]) phy_file <- tempfile(fileext = '.phy') phy_write(aln, phy_file) ## End(Not run)
## Not run: fa_file <- system.file("extdata/HA.fas", package="seqmagick") x <- fa_read(fa_file) aln <- bs_aln(x[1:5]) phy_file <- tempfile(fileext = '.phy') phy_write(aln, phy_file) ## End(Not run)
convert phylip file to fasta file
phy2fas(phyfile, outfile = "out.fas", type = "interleaved")
phy2fas(phyfile, outfile = "out.fas", type = "interleaved")
phyfile |
phylip file |
outfile |
output file |
type |
one of interleaved and sequential |
None
Guangchuang Yu
phy_file <- system.file("extdata/HA.phy", package="seqmagick") fa_file <- tempfile(fileext = '.fas') phy2fas(phy_file, fa_file)
phy_file <- system.file("extdata/HA.phy", package="seqmagick") fa_file <- tempfile(fileext = '.fas') phy2fas(phy_file, fa_file)
rename txt file (eg Description line of fasta file) according to first token (eg accession number)
renameTXT(txt_file, name_file, sep = "_", split = TRUE)
renameTXT(txt_file, name_file, sep = "_", split = TRUE)
txt_file |
txt file |
name_file |
name file |
sep |
separator |
split |
logical, split result or not |
None
Guangchuang Yu
replace character for example from '-' to 'N' of fasta sequence that only applied inside sequence any '-' character at start/end of the sequence (aligned seqs may contains '-' at prefix/suffix) will not be replaced
replaceInside(fasfile, from = "-", to = "N", outfile = NULL)
replaceInside(fasfile, from = "-", to = "N", outfile = NULL)
fasfile |
fasta file |
from |
character to be replaced, '-' by default |
to |
replace 'from' to 'to', 'N' by default |
outfile |
output file |
DNAStringSet
Guangchuang Yu
sequence length
seqlen(fasfile)
seqlen(fasfile)
fasfile |
fasta file |
numeric vector
Guangchuang Yu