
#######################################################################################
############ Qiime2 Code Run for PortaLyzer Paper Analysis ############################

#working directory: /mounts/lovelace/16S/Iceland-2021/qiime2-run1 
#files are stored in: /cluster/fieldscience/artifacts/Iceland-16S/2021-run-comparison 

# This run is intended to check the quality of the two sequencing runs we got from IU. We had two different DNA extraction technical replicates - one was completed in Iceland, and the other was shipped back to Earlham after Step 1 of the protocol becasue we ran out of buffer C2. The files are named 1-1 or 1-2, and 1-1 means the DNA analysis was completed in Iceland and they were sequenced in Run 1, and 1-2 means the DNA analysis was completed at EC and it was sequenced in Run 2 on a separate flow cell. 

# Here are the 16S PCR primers and adapter primer sequences, they need to be trimmed:
# PCR1 Primers:
# Custom 16SV4 For CGB Primer: 5’-CACTCTTTCCCTACACGACGCTCTTCCGATCTTATGGTAATTGTGTGCCAGCMGCCGCGGTA*A-3’
# PCR1-CGB/PCR2 Overlap: 5’-CACTCTTTCCCTACACGACGCTCTTCCGATCT-3’ * Indicates phosphothioate bond
# Custom 16SV4 Rev CGB Primer: 5’- GTTCAGACGTGTGCTCTTCCGATCTAGTCAGTCAGCCGGACTACHVGGGTWTCTAA*T-3’
# PCR1-CGB/PCR2 Overlap: 5’- GTTCAGACGTGTGCTCTTCCGATCT-3’ * Indicates phosphothioate bond

# Adapter primers (PCR2):
# 5’-Adapter:  AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
# 3’-Adapter:  CAAGCAGAAGACGGCATACGAGATXXXXXXXXXXXXGTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT.        XXXXXXXXXXXX denotes the index region of adapter. The index sequences (barcodes) contained in each adapter are listed in the summary sheet
# This manual has a picture showing the primer binding sites: https://perkinelmer-appliedgenomics.com/wp-content/uploads/2020/4203-02-NEXTflex-16S-V4-Amplicon-Seq-Kit-2.0.pdf


# on Lovelace

# original data from the runs is in /cluster/metagenomes/Iceland-2021/run-1 and /cluster/metagenonmes/Iceland-2021/run-2 
# working directory is: /mounts/lovelace/16S/Iceland-2021/qiime2-run1 
# files are stored in: /cluster/fieldscience/artifacts/Iceland-16S/2021-run-comparison

mkdir Iceland-2021-raw_data
cd Iceland-2021-raw_data
mkdir fastq-run1
mkdir fastq-run2

# move needed fastq files into the new directories. We want Casava1.8 format; these are the files without the L001 in the filename. (Those are Casava1.8a versions.)

cd /cluster/metagenomes/Iceland-2021/run-1
cp *.fastq /mounts/lovelace/16S/Iceland-2021/Iceland-2021-raw_data/fastq-run1
cd /mounts/lovelace/16S/Iceland-2021/Iceland-2021-raw_data/fastq-run1
rm *L001_R1_001.fastq
rm *L001_R2_001.fastq
gzip *.fastq

# do the same thing for run2

cd /cluster/metagenomes/Iceland-2021/run-2
cp *.fastq /mounts/lovelace/16S/Iceland-2021/Iceland-2021-raw_data/fastq-run2
cd /mounts/lovelace/16S/Iceland-2021/Iceland-2021-raw_data/fastq-run2
rm *L001_R1_001.fastq
rm *L001_R2_001.fastq
gzip *.fastq

cd qiime2-run1

# load qiime on lovelace
module load qiime2/qiime2-2021.4
source tab-qiime

# fetch manifest and metadata files
fetch-gdrive-object.py -d 1vdgr94EiEXY9kIpuyd_KNoojaVy53-5VMwBp3OBVunY -t sheet -f tsv -o Iceland-2021-Glacier-Manifest-run1.tsv
fetch-gdrive-object.py -d 1vF96mkR2rc7WBFZ4j2oXvpU0dEfLhyGLI9wt6o9Btx0 -t sheet -f tsv -o Iceland-2021-Glacier-Manifest-run2.tsv
fetch-gdrive-object.py -d 1WiQvFv1sM1IRrKFGMPeNERxcEUmd3DPaay3Ftu7hslE -t sheet -f tsv -o Iceland-2021-Glacier-Metadata-run1.tsv
fetch-gdrive-object.py -d 11CNeLoUp3RbOC3L8Amvks0nw-C_4Wf0SlEW0oM7KCSQ -t sheet -f tsv -o Iceland-2021-Glacier-Metadata-run2.tsv

# Pair the forward and reverse reads
nohup qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path Iceland-2021-Glacier-Manifest-run1.tsv \
--output-path Iceland-2021-run1-raw.qza \
--input-format PairedEndFastqManifestPhred33V2 &

rm nohup.out

nohup qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path Iceland-2021-Glacier-Manifest-run2.tsv \
--output-path Iceland-2021-run2-raw.qza \
--input-format PairedEndFastqManifestPhred33V2 &

qiime demux summarize \
	--i-data Iceland-2021-run1-raw.qza \
	--o-visualization Iceland-2021-run1-raw.qzv

qiime demux summarize \
	--i-data Iceland-2021-run2-raw.qza \
	--o-visualization Iceland-2021-run2-raw.qzv

# move files to /cluster/fieldscience/artifacts/Iceland-16S/2021-run-comparison

# now need to trim off primers and trunc based on quality score. We truncated at the first instance of the lower quaretile of the box-and-whisker plot landing below 20. The overlap should be ~52 bp (300+300-450-15-83) for run1, and ~30 bp for run2 (300+300-450-37-83). 

nohup qiime dada2 denoise-paired \
	--p-n-threads 8 \
	--i-demultiplexed-seqs Iceland-2021-run1-raw.qza \
	--p-trunc-len-f 285 --p-trim-left-f 89 \
	--p-trunc-len-r 217 --p-trim-left-r 102 \
	--o-representative-sequences Iceland-2021-run1-rep-seqs-dada2.qza \
	--o-table Iceland-2021-run1-table-dada2.qza \
	--o-denoising-stats Iceland-run1-stats-dada2.qza &

nohup qiime dada2 denoise-paired \
	--p-n-threads 8 \
	--i-demultiplexed-seqs Iceland-2021-run2-raw.qza \
	--p-trunc-len-f 263 --p-trim-left-f 89 \
	--p-trunc-len-r 217 --p-trim-left-r 102 \
	--o-representative-sequences Iceland-2021-run2-rep-seqs-dada2.qza \
	--o-table Iceland-2021-run2-table-dada2.qza \
	--o-denoising-stats Iceland-run2-stats-dada2.qza &

qiime metadata tabulate \
	--m-input-file Iceland-run1-stats-dada2.qza \
	--o-visualization Iceland-2021-run1-stats-dada2.qzv 

qiime metadata tabulate \
	--m-input-file Iceland-run2-stats-dada2.qza \
	--o-visualization Iceland-2021-run2-stats-dada2.qzv 

# move files to /cluster/fieldscience/artifacts/Iceland-16S/2021-run-comparison

# kept an average of 57% of reads in run1 after DADA2 using the most stringent parameters
# kept an average of 61% of reads in run2 after DADA2 using the most stringent parameters

# generate feature summaries

qiime feature-table summarize \
	--i-table Iceland-2021-run1-table-dada2.qza \
	--o-visualization Iceland-2021-run1-table-dada2.qzv \
	--m-sample-metadata-file Iceland-2021-Glacier-Metadata-run1.tsv

qiime feature-table summarize \
	--i-table Iceland-2021-run2-table-dada2.qza \
	--o-visualization Iceland-2021-run2-table-dada2.qzv \
	--m-sample-metadata-file Iceland-2021-Glacier-Metadata-run2.tsv

# move files to /cluster/fieldscience/artifacts/Iceland-16S/2021-run-comparison

# for Run1: at sampling depth 209479: Retained 3,770,622 (62.17%) features in 18 (85.71%) samples at the specifed sampling depth.
# for Run2: at sampling depth 253244: Retained 4,558,392 (61.99%) features in 18 (94.74%) samples at the specifed sampling depth.

qiime feature-table summarize \
  --i-table Iceland-2019-STP-FWDs-table-dada2-run1.qza \
  --o-visualization Iceland-2019-STP-FWDs-table-dada2-run1.qzv \
  --m-sample-metadata-file Skalanes-Test-Pit-Metadata.tsv

# Create a phylogenetic tree from the data: 

nohup qiime phylogeny align-to-tree-mafft-fasttree \
	--p-n-threads 8 \
	--i-sequences Iceland-2021-run1-CN-rep-seqs-dada2.qza \
	--o-alignment Iceland-2021-run1-CN-aligned-rep-seqs-dada2.qza \
	--o-masked-alignment Iceland-2021-run1-CN-masked-aligned-rep-seqs-dada2.qza \
	--o-tree Iceland-2021-run1-CN-unrooted-tree.qza \
	--o-rooted-tree Iceland-2021-run1-CN-rooted-tree.qza &

nohup qiime phylogeny align-to-tree-mafft-fasttree \
	--p-n-threads 8 \
	--i-sequences Iceland-2021-run2-CN-rep-seqs-dada2.qza \
	--o-alignment Iceland-2021-run2-CN-aligned-rep-seqs-dada2.qza \
	--o-masked-alignment Iceland-2021-run2-CN-masked-aligned-rep-seqs-dada2.qza \
	--o-tree Iceland-2021-run2-CN-unrooted-tree.qza \
	--o-rooted-tree Iceland-2021-run2-CN-rooted-tree.qza &

fetch-gdrive-object.py -d 1b5xVyWlpadsuOhyqXq9q3qSbtBcwPOLxC7kTk8pT2k8 -t sheet -f tsv -o Iceland-2021-Glacier-Metadata-Updated-SSIDs-run1.tsv
fetch-gdrive-object.py -d 12TBYI7O03xEqw-xGS579BB1VqpuW4Bj4o3z42AUzu-4 -t sheet -f tsv -o Iceland-2021-Glacier-Metadata-Updated-SSIDs-run2.tsv

nohup qiime diversity core-metrics-phylogenetic \
	--i-phylogeny Iceland-2021-run1-CN-rooted-tree.qza \
	--i-table Iceland-2021-run1-CN-table-dada2.qza  \
	--p-sampling-depth 209479 \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run1.tsv \
	--output-dir Iceland-2021-CN-run1-core-metrics &

nohup qiime diversity core-metrics-phylogenetic \
	--i-phylogeny Iceland-2021-run2-CN-rooted-tree.qza \
	--i-table Iceland-2021-run2-CN-table-dada2.qza  \
	--p-sampling-depth 253244 \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run2.tsv \
	--output-dir Iceland-2021-run2-CN-core-metrics &

# making .qzv files from the core metrics #

qiime diversity alpha-group-significance \
	--i-alpha-diversity Iceland-2021-CN-run1-core-metrics/faith_pd_vector.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run1.tsv \
	--o-visualization Iceland-2021-CN-run1-core-metrics/faith-pd-significance-run1.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity Iceland-2021-CN-run1-core-metrics/evenness_vector.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run1.tsv \
	--o-visualization Iceland-2021-CN-run1-core-metrics/evenness-significance-run1.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity Iceland-2021-CN-run1-core-metrics/shannon_vector.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run1.tsv \
	--o-visualization Iceland-2021-CN-run1-core-metrics/shannon-significance-run1.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity Iceland-2021-run2-CN-core-metrics/faith_pd_vector.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run2.tsv \
	--o-visualization Iceland-2021-run2-CN-core-metrics/faith-pd-significance-run2.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity Iceland-2021-run2-CN-core-metrics/evenness_vector.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run2.tsv \
	--o-visualization Iceland-2021-run2-CN-core-metrics/evenness-significance-run2.qzv

qiime diversity alpha-group-significance \
	--i-alpha-diversity Iceland-2021-run2-CN-core-metrics/shannon_vector.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run2.tsv \
	--o-visualization Iceland-2021-run2-CN-core-metrics/shannon-significance-run2.qzv

# move .qza files to /cluster/fieldscience/artifacts/Iceland-16S/2021-run-comparison

# taxonomic analysis 

cd /mounts/lovelace/16S/Iceland-2019/Skalanes-Test-Pit/run1
cp silva-138-99-nb-classifier.qza /mounts/lovelace/16S/Iceland-2021/qiime2-run1

rm nohup.out
nohup qiime feature-classifier classify-sklearn \
	--p-n-jobs 8 \
	--i-classifier silva-138-99-nb-classifier.qza \
	--i-reads Iceland-2021-run1-CN-rep-seqs-dada2.qza \
	--o-classification Iceland-2021-run1-CN-taxonomy-silva.qza &

nohup qiime feature-classifier classify-sklearn \
	--p-n-jobs 8 \
	--i-classifier silva-138-99-nb-classifier.qza \
	--i-reads Iceland-2021-run2-CN-rep-seqs-dada2.qza \
	--o-classification Iceland-2021-run2-CN-taxonomy-silva.qza &

qiime metadata tabulate \
	--m-input-file Iceland-2021-run1-CN-taxonomy-silva.qza \
	--m-input-file Iceland-2021-run1-CN-rep-seqs-dada2.qza  \
	--o-visualization Iceland-2021-run1-CN-taxonomy-silva-table.qzv

qiime taxa barplot \
	--i-table Iceland-2021-run1-CN-table-dada2.qza \
	--i-taxonomy Iceland-2021-run1-CN-taxonomy-silva.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run1.tsv  \
	--o-visualization Iceland-2021-run1-CN-taxonomy-silva-bar-plots.qzv

qiime metadata tabulate \
	--m-input-file Iceland-2021-run2-CN-taxonomy-silva.qza \
	--m-input-file Iceland-2021-run2-CN-rep-seqs-dada2.qza \
	--o-visualization Iceland-2021-run2-CN-taxonomy-silva-table.qzv

qiime taxa barplot \
	--i-table Iceland-2021-run2-CN-table-dada2.qza \
	--i-taxonomy Iceland-2021-run2-CN-taxonomy-silva.qza \
	--m-metadata-file Iceland-2021-Glacier-Metadata-Updated-SSIDs-run2.tsv \
	--o-visualization Iceland-2021-run2-CN-taxonomy-silva-bar-plots.qzv

#################################################################################
###### R code for alpha diversity analysis and plots ###########################

# Run 1

# load the packages
library(tidyverse) 
library(qiime2R)
library(ggplot2)

# set working directory
setwd("~/Desktop/Iceland/IFS2022/R_portalyzer")

# load the files needed 
shannon_1 <- read_qza("shannon_vector_run1.qza")
evenness_1 <- read_qza("evenness_vector_run1.qza")
shannon_2 <- read_qza("shannon_vector_run2.qza")
evenness_2 <- read_qza("evenness_vector_run2.qza")
metadata <- read_q2metadata("Iceland-2021-portalyzer-metadata.tsv")

# grab the data out of the .qza files
shannon_1 <- shannon_1$data %>%
  rownames_to_column("SampleID")
evenness_1 <- evenness_1$data %>%
  rownames_to_column("SampleID")

shannon_2 <- shannon_2$data %>%
  rownames_to_column("SampleID")
evenness_2 <- evenness_2$data %>%
  rownames_to_column("SampleID")

# combine diversity data

shannon_1$extraction <- 1
evenness_1$extraction <- 1

shannon_2$extraction <- 2
evenness_2$extraction <- 2

shannon <- rbind(shannon_1, shannon_2)
evenness <- rbind(evenness_1, evenness_2)

# make sure the metadata table SampleIDs match the data files ... modify if necessary

# add the data to the metadata table - it should automatically
# join by SampleID and extraction
metadata <- metadata %>%
  left_join(shannon) %>%
  left_join(evenness)

metadata$extraction <- as.character(metadata$extraction) 

# plot the shannon diversity 

  filter(!is.na(shannon_entropy)) %>%
  ggplot(aes(x=SampleID, y=shannon_entropy, fill=extraction)) +
  stat_summary(geom="bar", fun.data=mean_se, color="black") + # sets color of bar outline to black
  geom_jitter(shape=21, width=0.2, height=0) +
  coord_cartesian(ylim=c(0,11)) +
  facet_grid(cols=vars(location), scales = "free") +
  xlab("Sample ID") +
  ylab("Shannon Diversity") +
  theme_q2r() +
  theme(legend.position="none")

metadata %>%
  filter(!is.na(shannon_entropy)) %>%
  ggplot(aes(x=extraction, y=shannon_entropy, fill=extraction)) +
  stat_summary(geom="bar", fun.data=mean_se, color="black") + # sets color of bar outline to black
  geom_jitter(shape=21, width=0.2, height=0) +
  coord_cartesian(ylim=c(0,11)) +
  facet_grid(cols=vars(location), scales = "free") +
  xlab("Extraction") +
  ylab("Shannon Diversity") +
  theme_q2r() +
  theme(legend.position="none")

# plot the evenness diversity
metadata %>%
  filter(!is.na(pielou_evenness)) %>%
  ggplot(aes(x=SampleID, y=pielou_evenness, fill=extraction)) +
  stat_summary(geom="bar", fun.data=mean_se, color="black") + # sets color of bar outline to black
  geom_jitter(shape=21, width=0.2, height=0) +
  facet_grid(cols=vars(location),scales = "free") +
  xlab("Sample ID") +
  ylab("Evenness") +
  theme_q2r() +
  theme(legend.position="none")

metadata %>%
  filter(!is.na(pielou_evenness)) %>%
  ggplot(aes(x=extraction, y=pielou_evenness, fill=extraction)) +
  stat_summary(geom="bar", fun.data=mean_se, color="black") + # sets color of bar outline to black
  geom_jitter(shape=21, width=0.2, height=0) +
  facet_grid(cols=vars(location),scales = "free") +
  xlab("Extraction") +
  ylab("Evenness") +
  theme_q2r() +
  theme(legend.position="none")

