/Project_pbfb_Edisa_Xiu

Project for the course pratical bioinformatic for biologist from Edisa and Xiu

Primary LanguageR

# Project_pbfb_Edisa_Xiu
# Pipeline to analyze 16S rDNA sequences combining USEARCH and QIIME and community diversity analysis using R. 

####################################################################################################################
##1
##Obtain sequences from Dini-Andreote et al. 2014 The ISME Journal (30 samples in total: 2 months * 5 stages of succession * 3 replicates) 
##The sequences are already merged and quality filtered in QIIME. (Sequences files : seqs.fna) (Information of the samples in MappingMJ1.txt)
##IMPORTANT: To check the sequence files just open the head in the cluster or the terminal. The size of the files are too big and your text editor may crash. 

####################################################################################################################
##2 
##The original fna file has the following header: >1FMay_1 H23S4DR01B9UER orig_bc=CGTGTCTCTA new_bc=CGTGTCTCTA bc_diffs=0
##and we need to change it into:  >1FMay_1

sed 's/\s.*$//' seqs.fna > seqs.fa

####################################################################################################################
##3 
##Use USEARCH to pick OTU and make OTU table
#You need to have the USEARCH software installed.
#Write a script1.sh and run it in the Peregrine cluster
sbatch script1.sh

#!/bin/bash
#SBATCH --job-name=project_EX
#SBATCH --time=00:10:00
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --cpus-per-task=1
#SBATCH --mem=2G
#SBATCH --partition=short
pwd

# Size annotation and remove singletons
/data/miceco/software/usearch9.2.64_i86linux32 -fastx_uniques seqs.fa -fastaout uniques.fa -sizeout -relabel Uniq
head uniques.fasta

# Pick OTUs
/data/miceco/software/usearch9.2.64_i86linux32 -cluster_otus uniques.fa -otus otus.fa -relabel OTU -minsize 2
head otus.fa
grep -c "^>" otus.fa

# Make OTU table
/data/miceco/software/usearch9.2.64_i86linux32 -usearch_global seqs.fna -db otus.fa -strand plus -id 0.97 -otutabout otu_table.txt 
less –S otu_table.txt

####################################################################################################################
##4
# Using reorder_file.py to change the usearch format in "otu_table.txt" to QIIME format in order to use it in the pick representative OTU sequences step.

##to run the python script
python reorder_file.py

##the reorder_file content:
#!/usr/bin/env python

filename = "otu_table.txt"

outfile = "otu_table_reformatted.txt"

out = file(outfile, "wb")

with open(filename, "rb") as f:
    header = f.readline().split("\t")
    for line in f:
        splitline = line.split("\t")
        out.write(splitline[0])
	for i in range(1,len(splitline)):
            if splitline[i] == "1":
                out.write("\t" + header[i])
        out.write("\n")

out.close()

#Edit the header of "otu_table_reformatted.txt" (replace "OTU" to "denovo")
cp otu_table_reformatted.txt seqs_otus.txt; sed -i -e 's/OTU/denovo/g' seqs_otus.txt

##5
#Edit the header of the output file from the pick otus step
cp otus.fa otus_edited.fa; sed -i -e 's/>OTU/>denovo/g' otus_edited.fa

####################################################################################################################
##6
#Use QIIME to assign taxonomy and make OTU table with taxonomy
#Write script 2 and run it in Peregrine cluster

sbatch script2.sh

#!/bin/bash
#SBATCH --job-name=project_EX
#SBATCH --time=00:30:00
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --cpus-per-task=1
#SBATCH --mem=2G
#SBATCH --partition=short

module load QIIME/1.9.1-foss-2016a-Python-2.7.11-tmp
module list

# Pick representative OTU sequences
pick_rep_set.py -i seqs_otus.txt -f seqs.fa -o rep.fna

# Assign_taxonomy
assign_taxonomy.py -i rep_edited.fa -m uclust -o uclust_assigned_taxonomy -r 94_otus.fasta -t 94_otu_taxonomy.txt

####### Make_otu_table with taxonomy######
make_otu_table.py -i seqs_otus.txt -t uclust_assigned_taxonomy/rep_tax_assignments.txt -o otu_table1.biom

# Remove singletons (Alternate step):
filter_otus_from_otu_table.py -i otu_table1.biom -o otu_table2.biom -n 2

# Remove Unassigned
filter_taxa_from_otu_table.py -i otu_table2.biom -n Unassigned -o otu_table.biom

# convert to txt file with taxonomy:
biom convert -i otu_table.biom -o OTUtable.from_biom_w_taxonomy.txt --header-key taxonomy --to-tsv

####################################################################################################################
##7
# Analyze microbial community diversity 
# The following steps run on R version 3.3.2 (2016-10-31) using RStudio interface
# Install the libraries using Rpackages.R
# Load the OTU table (OTUtable.from_biom_w_taxonomy.txt) to R and change the OTU table format using OTUtableformat.R
# Change the header of OTUtable_nontaxon.txt using Header_mapping.csv in Microsoft Excel 2010

####################################################################################################################
##8
# Calculate alpha diversity (Richness, shannon and simpson) and 
# draw the bar plot (Richness.png, Shannon.png and Simpson.png) using alpha_diversity.R 

####################################################################################################################
##9
# Calculate beta diversity (bray-curtis) and make a plot (NMDS.png) using NMDS.R