Package 'seqmagick' reference manual

Title:	Sequence Manipulation Utilities
Description:	Supports reading and writing sequences for different formats (currently interleaved and sequential formats for 'FASTA' and 'PHYLIP'), file conversion, and manipulation (e.g. filter sequences that contain specify pattern, export consensus sequence from an alignment).
Authors:	Guangchuang Yu [aut, cre]
Maintainer:	Guangchuang Yu <[email protected]>
License:	Artistic-2.0
Version:	0.1.7
Built:	2025-03-03 06:05:07 UTC
Source:	https://github.com/yulab-smu/seqmagick

bs_aln

Description

sequence alignment

Usage

bs_aln(x, method = "muscle", ...)
bs_aln(x, method = "muscle", ...)

Arguments

`x`	XStringSet object
`method`	alignment method
`...`	additional parameter

Value

aligned sequences, XStringSet object

Author(s)

Guangchuang Yu

Examples

## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
bs_aln(x)

## End(Not run)
## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
bs_aln(x)

## End(Not run)

bs_filter

Description

biological sequence filter by searching pattern

Usage

bs_filter(x, pattern, by = "description", ignore.case = FALSE)
bs_filter(x, pattern, by = "description", ignore.case = FALSE)

Arguments

`x`	BStringSet object
`pattern`	keyword for filter
`by`	one of 'description' and 'sequence'
`ignore.case`	logical

Value

BStringSet object

Author(s)

Guangchuang Yu

Examples

fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
bs_filter(x, 'ATGAAAGTAAAA', by='sequence')
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
bs_filter(x, 'ATGAAAGTAAAA', by='sequence')

bs_hamming

Description

hamming distances of sequences

Usage

bs_hamming(x, count_indel = FALSE, ...)
bs_hamming(x, count_indel = FALSE, ...)

Arguments

`x`	BStringSet object
`count_indel`	whether count indel or not
`...`	additional parameters

Value

hamming distance

Author(s)

Guangchuang Yu

Examples

## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
## align first 5 sequences, use `bs_aln(x)` to align all sequences
aln <- bs_aln(x[1:5])
bs_hamming(aln)

## End(Not run)
## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
## align first 5 sequences, use `bs_aln(x)` to align all sequences
aln <- bs_aln(x[1:5])
bs_hamming(aln)

## End(Not run)

consensus

Description

consensus of aligned sequences

Usage

consensus(x, type = "DNA")

bs_consensus(x, type = "DNA", r = 1)
consensus(x, type = "DNA")

bs_consensus(x, type = "DNA", r = 1)

Arguments

`x`	BStringSet object
`type`	currently, only DNA supported
`r`	if any NT > r, it will be selected as representative base

Value

consensus sequence string

Author(s)

Guangchuang Yu

Examples

## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
## align first 5 sequences, use `bs_aln(x)` to align all sequences
aln <- bs_aln(x[1:5])
## or bs_consensus(aln)
consensus(aln)

## End(Not run)
## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
## align first 5 sequences, use `bs_aln(x)` to align all sequences
aln <- bs_aln(x[1:5])
## or bs_consensus(aln)
consensus(aln)

## End(Not run)

download_genbank

Description

download genbank or fasta file by accession number

Usage

download_genbank(acc, db = "nuccore", format = "genbank", outfile = NULL, ...)
download_genbank(acc, db = "nuccore", format = "genbank", outfile = NULL, ...)

Arguments

`acc`	accession number(s)
`db`	supported db, currently 'nuccore'
`format`	one of 'genbank' or 'fasta'
`outfile`	output file, by default, acc.gb or acc.fa
`...`	additional parameters for download.file

Value

output file vector

Author(s)

Guangchuang Yu

Examples

## Not run: 
tmpgb <- tempfile(fileext = '.gb')
tmpfa <- tempfile(fileext = '.fa')
download_genbank(acc='AB115403', format='genbank', outfile=tmpgb)
download_genbank(acc='AB115403', format='fasta', outfile=tmpfa)

## End(Not run)
## Not run: 
tmpgb <- tempfile(fileext = '.gb')
tmpfa <- tempfile(fileext = '.fa')
download_genbank(acc='AB115403', format='genbank', outfile=tmpgb)
download_genbank(acc='AB115403', format='fasta', outfile=tmpfa)

## End(Not run)

fa_combine

Description

combine 2 fasta files into 1

Usage

fa_combine(file1, file2, outfile = NULL, type = "interleaved")
fa_combine(file1, file2, outfile = NULL, type = "interleaved")

Arguments

`file1`	fasta file 1
`file2`	fasta file 2
`outfile`	output file
`type`	one of interleaved and sequential

Value

BStringSet

Author(s)

Guangchuang Yu

Examples

fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
fa1 <- tempfile(fileext=".fa")
fa2 <- tempfile(fileext=".fa")
fa_write(x[1:5], fa1)
fa_write(x[6:10], fa2)
fa_combine(fa1, fa2)
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
fa1 <- tempfile(fileext=".fa")
fa2 <- tempfile(fileext=".fa")
fa_write(x[1:5], fa1)
fa_write(x[6:10], fa2)
fa_combine(fa1, fa2)

fa_to_interleaved

Description

convert fasta file to interleaved format

convert fasta file to sequential format

Usage

fa_to_interleaved(file, outfile)

fa_to_sequential(file, outfile)
fa_to_interleaved(file, outfile)

fa_to_sequential(file, outfile)

Arguments

`file`	fasta file
`outfile`	output file

Value

None

Author(s)

Guangchuang Yu

Examples

fa_file <- system.file("extdata/HA.fas", package="seqmagick")
fa1 <- tempfile(fileext = '.fa')
fa2 <- tempfile(fileext = '.fa')
fa_to_interleaved(fa_file, fa1)
fa_to_sequential(fa_file, fa2)
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
fa1 <- tempfile(fileext = '.fa')
fa2 <- tempfile(fileext = '.fa')
fa_to_interleaved(fa_file, fa1)
fa_to_sequential(fa_file, fa2)

fa_write

Description

write fasta file

Usage

fa_write(x, outfile, type = "interleaved")
fa_write(x, outfile, type = "interleaved")

Arguments

`x`	XStringSet object
`outfile`	output file
`type`	one of interleaved and sequential

Value

None

Author(s)

Guangchuang Yu

References

https://phylipweb.github.io/phylip/

Examples

phy_file <- system.file("extdata/HA.phy", package="seqmagick")
x <- phy_read(phy_file)
fa_file <- tempfile(fileext = '.fas')
fa_write(x, fa_file)
phy_file <- system.file("extdata/HA.phy", package="seqmagick")
x <- phy_read(phy_file)
fa_file <- tempfile(fileext = '.fas')
fa_write(x, fa_file)

fas2phy

Description

convert fasta (aligned sequences) to phylip format

Usage

fas2phy(fasfile, outfile = "out.phy", type = "sequential")
fas2phy(fasfile, outfile = "out.phy", type = "sequential")

Arguments

`fasfile`	aligned sequences in fasta format
`outfile`	output file
`type`	one of interleaved and sequential

Value

None

Author(s)

Guangchuang Yu fa_file <- system.file("extdata/HA.fas", package="seqmagick") phy_file <- tempfile(fileext = ".phy") fas2phy(fa_file, phy_file)

gb_read

Description

extract accession number and sequence from genbank file

Usage

gb_read(file)
gb_read(file)

Arguments

file

input genbank file

Value

sequence object

Author(s)

Guangchuang Yu

get_id

Description

get id at specific position

Usage

get_id(x, sep = " ", position)
get_id(x, sep = " ", position)

Arguments

`x`	sequence description line
`sep`	separator to split x
`position`	id position

Value

Author(s)

Guangchuang Yu

Examples

fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
get_id(names(x)[1:5], sep = " ", position=1)
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
get_id(names(x)[1:5], sep = " ", position=1)

read sequence alignment file

Description

read sequence alignment file

Usage

mega_read(file)

fa_read(file, type = "auto")

clw_read(file, type = "auto")

sth_read(file, type = "auto")
mega_read(file)

fa_read(file, type = "auto")

clw_read(file, type = "auto")

sth_read(file, type = "auto")

Arguments

`file`	multiple sequence file
`type`	one of 'DNA', 'RNA', 'AA', 'Protein', 'unknown' or 'auto'

Value

BStringSet object

Author(s)

Guangchuang Yu

Examples

fa_file <- system.file("extdata/HA.fas", package="seqmagick")
fa_read(fa_file)

mega_file <- system.file("extdata/mega/Crab_rRNA.meg", package="seqmagick")
mega_read(mega_file)
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
fa_read(fa_file)

mega_file <- system.file("extdata/mega/Crab_rRNA.meg", package="seqmagick")
mega_read(mega_file)

phy_read

Description

read aligned sequences in phylip format

Usage

phy_read(file)
phy_read(file)

Arguments

file

phylip file

Value

BStringSet object

Author(s)

Guangchuang Yu

Examples

phy_file <- system.file("extdata/HA.phy", package="seqmagick")
phy_read(phy_file)
phy_file <- system.file("extdata/HA.phy", package="seqmagick")
phy_read(phy_file)

phy_write

Description

write phylip file

Usage

phy_write(x, outfile, type = "sequential")
phy_write(x, outfile, type = "sequential")

Arguments

`x`	XStringSet object
`outfile`	output file
`type`	one of interleaved and sequential

Value

None

Author(s)

Guangchuang Yu

Examples

## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
aln <- bs_aln(x[1:5])
phy_file <- tempfile(fileext = '.phy')
phy_write(aln, phy_file)

## End(Not run)
## Not run: 
fa_file <- system.file("extdata/HA.fas", package="seqmagick")
x <- fa_read(fa_file)
aln <- bs_aln(x[1:5])
phy_file <- tempfile(fileext = '.phy')
phy_write(aln, phy_file)

## End(Not run)

phy2fas

Description

convert phylip file to fasta file

Usage

phy2fas(phyfile, outfile = "out.fas", type = "interleaved")
phy2fas(phyfile, outfile = "out.fas", type = "interleaved")

Arguments

`phyfile`	phylip file
`outfile`	output file
`type`	one of interleaved and sequential

Value

None

Author(s)

Guangchuang Yu

Examples

phy_file <- system.file("extdata/HA.phy", package="seqmagick")
fa_file <- tempfile(fileext = '.fas')
phy2fas(phy_file, fa_file)
phy_file <- system.file("extdata/HA.phy", package="seqmagick")
fa_file <- tempfile(fileext = '.fas')
phy2fas(phy_file, fa_file)

replace character for example from '-' to 'N' of fasta sequence that only applied inside sequence any '-' character at start/end of the sequence (aligned seqs may contains '-' at prefix/suffix) will not be replaced

Usage

replaceInside(fasfile, from = "-", to = "N", outfile = NULL)
replaceInside(fasfile, from = "-", to = "N", outfile = NULL)

Arguments

`fasfile`	fasta file
`from`	character to be replaced, '-' by default
`to`	replace 'from' to 'to', 'N' by default
`outfile`	output file

Value

DNAStringSet

Author(s)

Guangchuang Yu

seqlen

Description

sequence length

Usage

seqlen(fasfile)
seqlen(fasfile)

Arguments

fasfile

fasta file

Value

numeric vector

Author(s)

Guangchuang Yu

`bamfile`	bam file
`refseq`	refseq, DNAStringSet object

`bamfile`	bam file
`refseq`	refseq, DNAStringSet object

`x`	BStringSet object
`mapping`	two column data.frame
`position`	rename token at specific position
`sep`	sepator to divide token
`mode`	one of 'replace', 'prefix' or 'suffix'

`txt_file`	txt file
`name_file`	name file
`sep`	separator
`split`	logical, split result or not

`fasfile`	input fasta file
`pattern`	keyword for filter
`by`	one of 'description' and 'sequence'
`ignore.case`	logical
`outfile`	output file
`type`	one of 'interleaved' and 'sequential'

`fasfile`	fasta file
`mapping_file`	mapping file
`position`	rename token at specific position
`sep`	sepator to divide token
`mode`	one of 'replace', 'prefix' or 'suffix'
`outfile`	output file

Package 'seqmagick'

Help Index

bam2DNAStringSet

Description

Usage

Arguments

Value

Author(s)

bam2DNAStringSet2

Description

Usage

Arguments

Value

Author(s)

bs_aln

Description

Usage

Arguments

Value

Author(s)

Examples

bs_filter

Description

Usage

Arguments

Value

Author(s)

Examples

bs_hamming

Description

Usage

Arguments

Value

Author(s)

Examples

bs_rename

Description

Usage

Arguments

Value

Author(s)

consensus

Description

Usage

Arguments

Value

Author(s)

Examples

download_genbank

Description

Usage

Arguments

Value

Author(s)

Examples

fa_combine

Description

Usage

Arguments

Value

Author(s)

Examples

fa_filter

Description

Usage

Arguments

Value

Author(s)

fa_rename

Description

Usage

Arguments

Value

Author(s)

fa_to_interleaved

Description

Usage

Arguments

Value

Author(s)