Authors: Uthaipaisanwong P.
Last update: August 8, 2018
1 Prerequisties
2 Annotation of coding regions
3 Annotation of non-coding regions
4 Genome component
5 Convert result to GFF3 file format
6 CAB-Inhouse annotation pipeline (CABAnnot)
7 Genome visualization by JBrowse
For participants, these software are already installed on our CAB server (158.108.144.7). You can just access the program on the CAB server (AgCipher) using the command line (shell).
Access CAB server (AgCipher)
user: annotest password: anno@cab18
ssh annotest@158.108.144.7
password: anno@cab18
input = "/home/annotest"
cd {input}
On the CAB server, please check current working directory
pwd
Make directory test
mkdir test
cd test
Download assembled sequences for annotation from
/home/annotest/AnnotationData
cp /home/annotest/AnnotationData/contigs_nc.fasta .
ls -al
%%bash
/share/apps/augustus-3.3.1/bin/augustus
%%bash
/share/apps/augustus-3.3.1/bin/augustus --genemodel=partial --protein=on --introns=on --start=on --stop=on --codingseq=on --gff3=on --UTR=on --uniqueGeneId=true --species=tomato /home/annotest/test/contigs_nc.fasta > /home/annotest/test/contigs_nc.gff
list files
ls -al
more contigs_nc.gff
%%bash
perl /share/apps/augustus-3.3.1/scripts/getAnnoFasta.pl /home/annotest/test/contigs_nc.gff --seqfile=/home/annotest/test/contigs_nc.fasta
ls -al
more contigs_nc.codingseq
BLAST coding sequences against viridiplantae_prot database
%%bash
/share/apps/blast/bin/blastx -db /home/pichapuk/DATABASE/PLANT/viridiplantae_prot -outfmt "6 qseqid sseqid pident evalue bitscore length mismatch gapopen frames qstart qend qlen sstart send slen sstrand stitle" -evalue 1e-5 -num_threads 32 -max_target_seqs 10 -out /home/annotest/test/contigs_nc.codingseq.viridiplantae_prot.6.blast -query /home/annotest/test/contigs_nc.codingseq
ls -al
more contigs_nc.codingseq.viridiplantae_prot.6.blast
BLAST assembled sequences against viridiplantae_prot database (outfmt=0)
%%bash
#/share/apps/blast/bin/blastx -db /home/pichapuk/DATABASE/PLANT/viridiplantae_prot -outfmt 0 -evalue 1e-60 -max_hsps 10 -num_threads 32 -out /home/annotest/test/contigs_nc.fasta.viridiplantae_prot.0.blast -query /home/annotest/test/contigs_nc.fasta
ls -lt
more contigs_nc.fasta.viridiplantae_prot.0.blast
%%bash
tRNAscan-SE -h
%%bash
tRNAscan-SE -o /home/annotest/test/contigs_nc.tRNA -f /home/annotest/test/contigs_nc.tRNACove -m /home/annotest/test/contigs_nc.tRNAStat /home/annotest/test/contigs_nc.fasta
ls -lt
more contigs_nc.tRNA
more contigs_nc.tRNAStat
BLAST assembled sequences against plant ribosomal RNA database (NCBI; August 2018)
%%bash
/share/apps/blast/bin/blastn -help
%%bash
/share/apps/blast/bin/blastn -db /home/pichapuk/DATABASE/RNA/rRNA_plant_ncbi_edit -outfmt "6 qseqid sseqid pident evalue bitscore length mismatch gapopen frames qstart qend qlen sstart send slen sstrand stitle" -evalue 1e-5 -word_size 11 -show_gis -max_target_seqs 10 -num_threads 32 -out /home/annotest/test/contigs_nc.ribosomeRNA.6.blast -query /home/annotest/test/contigs_nc.fasta
more contigs_nc.ribosomeRNA.6.blast
%%bash
/share/apps/miRanda-3.3a/src/miranda /home/pichapuk/mirbase/21/mature.fa /home/annotest/test/contigs_nc.fasta > /home/annotest/test/contigs_nc.mirbase.out
more contigs_nc.mirbase.out
%%bash
perl /share/apps/RepeatMasker/RepeatMasker -h
%%bash
perl /share/apps/RepeatMasker/RepeatMasker /home/annotest/test/contigs_nc.fasta -species arabidopsis -gff
ls -lt
more contigs_nc.fasta.masked
more contigs_nc.fasta.tbl
more contigs_nc.fasta.out
%%bash
perl /share/apps/CABAnnot1.0/scripts/misa.pl
%%bash
more /share/apps/CABAnnot1.0/scripts/misa.ini
%%bash
perl /share/apps/CABAnnot1.0/scripts/misa.pl /home/annotest/test/contigs_nc.fasta /share/apps/CABAnnot1.0/scripts/misa.ini
ls -lt
more contigs_nc.fasta.misa
more contigs_nc.fasta.statistics
%%bash
perl /share/apps/CABAnnot1.0/scripts/blast2gff3.pl /home/annotest/test/contigs_nc.codingseq.viridiplantae_prot.6.blast /home/annotest/test/contigs_nc.gff
more contigs_nc.codingseq.viridiplantae_prot.6.annotate.gff
%%bash
perl /share/apps/CABAnnot1.0/scripts/trna2gff3.pl /home/annotest/test/contigs_nc.tRNA
more contigs_nc.tRNA.gff
%%bash
perl /share/apps/CABAnnot1.0/scripts/rrna2gff3.pl /home/annotest/test/contigs_nc.ribosomeRNA.6.blast
more contigs_nc.ribosomeRNA.6.blast.gff
%%bash
perl /share/apps/CABAnnot1.0/scripts/repeatMasker2gff3.pl /home/annotest/test/contigs_nc.fasta.out
more contigs_nc.fasta.out.gff
%%bash
perl /share/apps/CABAnnot1.0/scripts/misa_gff3_converter.pl --input /home/annotest/test/contigs_nc.fasta.misa --output /home/annotest/test/contigs_nc.misa.gff
more contigs_nc.misa.gff
Make directory annotation at your home directory /home/annotest
cd /home/annotest/
mkdir annotation
cd /home/annotest/annotation
pwd
cp /home/annotest/AnnotationData/contigs.fa /home/annotest/annotation
%%bash
/share/apps/bbmap/reformat.sh in=contigs.fa out=contigs.cut.fa minlength=11900
screen -S annotation
%%bash
/share/apps/CABAnnot1.0/bin/CABAnnot.pl -h
#/share/apps/CABAnnot1.0/bin/CABAnnot.pl --input contigs.cut.fa --sequenceType "nuclear" --organismName "Solanum lycopersicum" --version 1
/share/apps/CABAnnot1.0/bin/CABAnnot.pl --input contigs.cut.fa --sequenceType "nuclear" --organismName "Solanum lycopersicum" --version 1 --runSequencePreprocess 1 --runSequenceAnalysis 0 --chooseNoncodingProgram "1,2,3,4"
“Ctrl-A” and “d“
top
Format reference sequences for JBrowse
cd /home/annotest/public_html
Make directory "Tomato" at /home/user/public_html
mkdir Tomato
cp /home/annotest/annotation/* /home/annotest/public_html/Tomato/
Change directory to /home/user/public_html/Tomato
cd /home/annotest/public_html/Tomato
ls -al
Change directory to /home/user/public_html/JBrowse
cd /home/annotest/public_html/JBrowse
Configure JBrowse *** change annotest to your username
/home/annotest/public_html/JBrowse/bin/prepare-refseqs.pl --fasta /home/annotest/public_html/Tomato/contigs_nc.fasta --out /home/annotest/public_html/Tomato/TomatoV1
/home/annotest/public_html/JBrowse/bin/flatfile-to-json.pl --gff /home/annotest/public_html/Tomato/contigs_nc.codingseq.nr.6.annotate.gff --trackLabel "Gene" --trackType "JBrowse/View/Track/CanvasFeatures" --className "feature5" --key "gene" --subfeatureClasses '{"CDS": "transcript-CDS", "exon": "transcript-exon"}' --out /home/annotest/public_html/Tomato/TomatoV1
/home/annotest/public_html/JBrowse/bin/flatfile-to-json.pl --trackLabel "tRNA" --trackType "JBrowse/View/Track/CanvasFeatures" --key "tRNA" --className generic_parent --subfeatureClasses '{"match_part" : "feature"}' --gff /home/annotest/public_html/Tomato/contigs_nc.tRNA.gff --out /home/annotest/public_html/Tomato/TomatoV1
/home/annotest/public_html/JBrowse/bin/flatfile-to-json.pl --trackLabel "repeat masker" --trackType CanvasFeatures --key RepeatMasker --className generic_parent --subfeatureClasses '{"match_part" : "feature"}' -gff /home/annotest/public_html/Tomato/contigs_nc.fasta.out.gff --out /home/annotest/public_html/Tomato/TomatoV1
Display annotation result
http://www.breedserve.cab.kps.ku.ac.th/~annotest/JBrowse/?data=../Tomato/TomatoV1