Incorporating Illumina RNAseq into AUGUSTUS with Tophat

Incorporating Illumina RNAseq into AUGUSTUS with Tophat

#!/bin/bash

###### Step 1 ---------------------

# Aligning reads with Bowtie/Tophat
bowtie2-build genome.fasta.masked genome
tophat -r 20 -mate-std-dev 40 --coverage-search --microexon-search -p 24 --library-type fr-unstranded --phred64-quals genome reads_1.fq reads_2.fq 

# Filtering raw alignments
samtools sort -n tophat_out/accepted_hits.bam tophat_out/accepted_hits.s
augustus.2.7/auxprogs/filterBam/bin/filterBam --uniq --paired --in tophat_out/accepted_hits.s.bam --out tophat_out/accepted_hits.sf.bam

samtools view -H tophat_out/accepted_hits.sf.bam > header.txt

# Creating intron hints
samtools sort tophat_out/accepted_hits.sf.bam both.ssf
augustus.2.7/auxprogs/bam2hints/bam2hints --intronsonly --in=both.ssf.bam --out=hints.gff

# Run Augustus
augustus.2.7/bin/augustus --species=SPECIES --extrinsicCfgFile=extrinsic.cfg --alternatives-from-evidence=true --hintsfile=hints.gff --allow_hinted_splicesites=atac --introns=on --genemodel=complete genome.fasta > aug1.out
######-----------------------------

###### Step 2 ---------------------

# Create an exon-exon junction database
cat aug1.out | tee aug.prelim.gff | grep -P "\tintron\t" > aug1.introns.gff
cat hints.gff aug1.introns.gff | perl -ne 'split; print "@_[0]:@_[3]-@_[4]\n";' | sort -u > introns.lst
augustus.2.7/scripts/intron2exex.pl --flank=90 --introns=introns.lst --seq=genome.fasta.masked --exex=exex.fa --map=map.psl
bowtie2-build exex.fa genome_exex1

# Aligning reads with Bowtie
bowtie2 -q --phred64 --fr -p 24 --reorder -x genome_exex1 -1 reads_1.fq -2 reads_2.fq -S bowtie.sam
samtools view -S -F 4 bowtie.sam > bowtie.F.sam
augustus.2.7/scripts/samMap.pl bowtie.F.sam map.psl > bowtie.global.sam 2> bowtie.samMap.err
# 需要将samMap.pl中的75修正为需要的reads长度。注意使用 2> 来重定向STDERR数据,以提高运行速度。

# Join data from step 1 and step 2
bamtools filter -in tophat_out/accepted_hits.bam -out tophat_out/accepted_hits.noN.bam -script augustus.2.7/auxprogs/auxBamFilters/operation_N_filter.txt
cat header.txt bowtie.global.sam > bowtie.global.h.sam
samtools view -bS -o bowtie.global.h.bam bowtie.global.h.sam
bamtools merge -in bowtie.global.h.bam -in tophat_out/accepted_hits.noN.bam -out both.bam
samtools sort -n both.bam both.s

# Filtering raw alignments
augustus.2.7/auxprogs/filterBam/bin/filterBam --uniq --paired --in both.s.bam --out both.sf.bam

# Creating intron hints
samtools sort both.sf.bam both.ssf
augustus.2.7/auxprogs/bam2hints/bam2hints --intronsonly --in=both.ssf.bam --out=hints.2.gff

#Run Augustus
augustus --species=yourSpecies --extrinsicCfgFile=extrinsic.cfg --alternatives-from-evidence=true --hintsfile=hints.2.gff --allow_hinted_splicesites=atac genome.fasta > aug2.out

发表评论

您的电子邮箱地址不会被公开。 必填项已用*标注

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据