ls *_R1_001.fastq | cut -f1-2 -d "_" > samples
!/bin/sh
for sample in $(cat samples)_
do
echo "On sample: $sample" | cutadapt -a ^GTGCCAGCMGCCGCGGTAA...ATTAGAWACCCBDGTAGTCC -A ^GGACTACHVGGGTWTCTAAT...TTACCGCGGCKGCTGGCAC -m 215 -M 285 --discard-untrimmed -o ${sample}_R1_001_trimmed.fq.gz -p ${sample}_R2_001_trimmed.fq.gz ${sample}_R1_001.fastq ${sample}_R2_001.fastq >> cutadapt_primer_trimming_stats.txt 2>&1
done
paste samples <(grep "passing" cutadapt_primer_trimming_stats.txt | cut -f3 -d "(" | tr -d ")") <(grep "filtered" cutadapt_primer_trimming_stats.txt | cut -f3 -d "(" | tr -d ")")
> library("dada2")
> packageVersion("dada2")
‘1.16.0’
> setwd("/project/geobiology/scratch/flood/Simons_Project_18_itag/")
> list.files() #just to see if it is reading the files in the directory
> samples <- scan("samples", what="character")
> forward_reads <- paste0(samples, "_R1_001_trimmed.fq.gz")
> reverse_reads <- paste0(samples, "_R2_001_trimmed.fq.gz")
> filtered_forward_reads <- paste0(samples, "_R1_filtered.fq.gz")
> filtered_reverse_reads <- paste0(samples, "_R2_filtered.fq.gz")
> plotQualityProfile(forward_reads)
> filtered_out <- filterAndTrim(forward_reads, filtered_forward_reads, reverse_reads, filtered_reverse_reads, maxEE=c(2,2), rm.phix=TRUE, minLen=175, truncLen=c(250,200))
> class(filtered_out)
[1] «matrix»
> dim(filtered_out)
[1] 62 2
> filtered_out
> err_forward_reads <- learnErrors(filtered_forward_reads, multithread = TRUE)
> err_reverse_reads <- learnErrors(filtered_reverse_reads, multithread = TRUE)
> plotErrors(err_forward_reads, nominalQ=TRUE)
> plotErrors(err_reverse_reads, nominalQ=TRUE)
> derep_forward <- derepFastq(filtered_forward_reads, verbose=TRUE)
> names(derep_forward) <- samples
> derep_reverse <- derepFastq(filtered_reverse_reads, verbose=TRUE)
> names(derep_reverse) <- samples
> dada_forward <- dada(derep_forward, err=err_forward_reads, pool="pseudo")
> dada_forward[[1]]
dada-class: object describing DADA2 denoising results 31 sequence variants were inferred from 248 input unique sequences. Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
> dada_reverse <- dada(derep_reverse, err=err_reverse_reads, pool="pseudo")
> dada_reverse[[1]]
33 sequence variants were inferred from 178 input unique sequences. Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
> merged_amplicons <- mergePairs(dada_forward, derep_forward, dada_reverse, derep_reverse, trimOverhang=TRUE, minOverlap=170, verbose=TRUE)
> seqtab <- makeSequenceTable(merged_amplicons)
> seqtab.nochim <- removeBimeraDenovo(seqtab, verbose=T)
> sum(seqtab.nochim)/sum(seqtab)
[1] 0.9870203 lastest package [1] 0.987158
> getN <- function(x) sum(getUniques(x))
> summary_tab <- data.frame(row.names=samples, dada2_input=filtered_out[,1], filtered=filtered_out[,2], dada_f=sapply(dada_forward, getN), dada_r=sapply(dada_reverse, getN), merged=sapply(merged_amplicons, getN), nonchim=rowSums(seqtab.nochim), final_perc_reads_retained=round(rowSums(seqtab.nochim)/filtered_out[,1]*100, 1))
> taxa <- assignTaxonomy(seqtab.nochim, "/project/geobiology/DADA2_databases/silva_nr_v138_train_set.fa.gz", tryRC=T)
> taxa <- addSpecies(taxa, "/project/geobiology/DADA2_databases/silva_species_assignment_v138.fa.gz")
> asv_seqs <- colnames(seqtab.nochim)
> asv_headers <- vector(dim(seqtab.nochim)[2], mode="character")
for (i in 1:dim(seqtab.nochim)[2]) {
asv_headers[i] <- paste(">ASV", i, sep="_")
}
> asv_fasta <- c(rbind(asv_headers, asv_seqs))
> write(asv_fasta, "ASVs2.fa")
> asv_tab <- t(seqtab.nochim)
> row.names(asv_tab) <- sub(">", "", asv_headers)
> write.table(asv_tab, "ASVs_counts2.tsv", sep="\t", quote=F, col.names=NA)
> asv_tax <- taxa
> row.names(asv_tax) <- sub(">", "", asv_headers)
> write.table(asv_tax, "ASVs_taxonomy2.tsv", sep="\t", quote=F, col.names=NA)
> library("phyloseq")
> library("vegan")
> library("DESeq2")
> library("ggplot2")
> library("dendextend")
> library("tidyr")
> library("viridis")
> library("reshape")
> library("decontam")
> packageVersion("phyloseq") ‘1.32.0’
> packageVersion("vegan") ‘2.5.6’
> packageVersion("DESeq2") ‘1.28.0’
> packageVersion("ggplot2") ‘3.3.0’
> packageVersion("dendextend") ‘1.13.4’
> packageVersion("tidyr") ‘1.0.2’
> packageVersion("viridis") ‘0.5.1’
> packageVersion("reshape") ‘0.8.8’
> packageVersion("decontam") [1] ‘1.8.0’
> load("P18_spring2020.RData")
> objects() #lists all objects
> rm()
> load("P18_spring2020_red.RData")
> p18dataframe2 <- read.csv("p18dataframe2.csv", header = TRUE, sep=",")
> ps2 <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE), tax_table(taxa), sample_data(p18dataframe2))
> rownames(p18dataframe2) <- samples.out
> p18dataframe3 <- p18dataframe2[ -c(1) ]
> ps2 <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE), tax_table(taxa), sample_data(p18dataframe3))
> table(tax_table(ps2)[, "Kingdom"], exclude = NULL)
Archaea Bacteria Eukaryota
> asv_seqs <- colnames(seqtab.nochim)
> asv_headers <- vector(dim(seqtab.nochim)[2], mode="character")asv_headers <- vector(dim(seqtab.nochim)[2], mode="character")
for (i in 1:dim(seqtab.nochim)[2]) { asv_headers[i] <- paste(">ASV", i, sep="_") }
> asv_fasta <- c(rbind(asv_headers, asv_seqs))
> write(asv_fasta, "ASVs.fa")
> asv_tab <- t(seqtab.nochim)
> row.names(asv_tab) <- sub(">", "", asv_headers)
> write.table(asv_tab, "ASVs_counts.tsv", sep="\t", quote=F, col.names=NA)
> asv_tax <- taxa
> row.names(asv_tax) <- sub(">", "", asv_headers)
> write.table(asv_tax, "ASVs_taxonomy.tsv", sep="\t", quote=F, col.names=NA)
> colnames(asv_tab)
> vector_for_decontam <- c(rep(TRUE, 2), rep(FALSE, 51), rep(TRUE, 1), rep(FALSE, 4), rep(TRUE, 1), rep(FALSE, 3))
> contam_df <- isContaminant(t(asv_tab), neg=vector_for_decontam)
> table(contam_df$contaminant)
> contam_asvs <- row.names(contam_df[contam_df$contaminant == TRUE, ])
> asv_tax[row.names(asv_tax) %in% contam_asvs, ]
Kingdom Phylum Class
ASV_1497 «Bacteria» «Bacteroidota» «Bacteroidia»
ASV_1660 «Bacteria» «Firmicutes» «Bacilli»
ASV_2331 «Archaea» «Nanoarchaeota» «Nanoarchaeia»
ASV_3776 «Bacteria» «Proteobacteria» «Alphaproteobacteria»
ASV_5306 «Bacteria» «Actinobacteriota» «Actinobacteria»
ASV_9182 «Bacteria» «Proteobacteria» «Gammaproteobacteria»
ASV_23524 «Bacteria» «Desulfobacterota» «Desulfobulbia»
Order Family
ASV_1497 «Flavobacteriales» «Weeksellaceae»
ASV_1660 «Bacillales» «Planococcaceae»
ASV_2331 «Woesearchaeales» NA
ASV_3776 «Rhizobiales» «Xanthobacteraceae»
ASV_5306 «Propionibacteriales» «Propionibacteriaceae»
ASV_9182 «Xanthomonadales» «Xanthomonadaceae»
ASV_23524 «Desulfobulbales» NA
Genus Species
ASV_1497 «Chryseobacterium» NA
ASV_1660 «Lysinibacillus» NA
ASV_2331 NA NA
ASV_3776 «Bradyrhizobium» NA
ASV_5306 «Cutibacterium» NA
ASV_9182 «Stenotrophomonas» «maltophilia»
ASV_23524 NA NA
> contam_indices <- which(asv_fasta %in% paste0(">", contam_asvs))
> dont_want <- sort(c(contam_indices, contam_indices + 1))
> asv_fasta_no_contam <- asv_fasta[- dont_want]
> asv_tab_no_contam <- asv_tab[!row.names(asv_tab) %in% contam_asvs, ]
> asv_tax_no_contam <- asv_tax[!row.names(asv_tax) %in% contam_asvs, ]
> write(asv_fasta_no_contam, "ASVs-no-contam.fa")
> write.table(asv_tab_no_contam, "ASVs_counts-no-contam.tsv",
sep="\t", quote=F, col.names=NA)
> write.table(asv_tax_no_contam, "ASVs_taxonomy-no-contam.tsv",
sep="\t", quote=F, col.names=NA)
> count_tab <- read.table("ASVs_counts-no-contam.tsv", header=T, row.names=1,
check.names=F, sep="\t")[ , -c(1:2,15,28,52:54,59)]
> tax_tab <- as.matrix(read.table("ASVs_taxonomy-no-contam.tsv", header=T,
row.names=1, check.names=F, sep="\t"))
> sample_info_tab <- read.table("P18dataframe3_no_controls.tsv", header=T, row.names=1, check.names=F, sep="\t")
> sample_info_tab$Color <- as.character(sample_info_tab$Color)
> sample_info_tab$NewColor <- as.character(sample_info_tab$NewColor)
> sample_info_tab$LocationColor <- as.character(sample_info_tab$LocationColor)
> deseq_counts <- DESeqDataSetFromMatrix(count_tab, colData = sample_info_tab, design = ~Location)
> deseq_counts_vst <- varianceStabilizingTransformation(deseq_counts)
> vst_trans_count_tab <- assay(deseq_counts_vst)
> euc_dist <- dist(t(vst_trans_count_tab))
> euc_clust <- hclust(euc_dist, method="ward.D2")
> plot(euc_clust)
> euc_dend <- as.dendrogram(euc_clust, hang=0.1)
> dend_cols <- as.character(sample_info_tab$Color[order.dendrogram(euc_dend)])
> labels_colors(euc_dend) <- dend_cols
> plot(euc_dend, ylab="VST Euc. dist.")
> vst_count_phy <- otu_table(vst_trans_count_tab, taxa_are_rows=T)
> sample_info_tab_phy <- sample_data(sample_info_tab)
> vst_physeq <- phyloseq(vst_count_phy, sample_info_tab_phy)
> vst_pcoa <- ordinate(vst_physeq, method="MDS", distance="euclidean")
> eigen_vals <- vst_pcoa$values$Eigenvalues
> geom_point(size=2) + labs(col="Location") +
geom_text(aes(label=rownames(sample_info_tab), hjust=0.3, vjust=-0.4)) +
coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA") +
scale_color_manual(values=unique(sample_info_tab$Color[order(sample_info_tab$Type)])) +
theme(legend.position="none")
> rarecurve(t(count_tab), step=100, col=sample_info_tab$Color, lwd=2, ylab="ASVs", label=T)
abline(v=(min(rowSums(t(count_tab)))))
> count_tab_phy <- otu_table(count_tab, taxa_are_rows=T)
> tax_tab_phy <- tax_table(tax_tab)
> ASV_physeq <- phyloseq(count_tab_phy, tax_tab_phy, sample_info_tab_phy)
> plot_richness(ASV_physeq, color="Type", measures=c("Chao1", "Shannon")) + scale_color_manual(values=unique(sample_info_tab$Color[order(sample_info_tab$Type)])) + theme(legend.title = element_blank())
> plot_richness(ASV_physeq, x="Type", color="Location", measures=c("Chao1", "Shannon")) + scale_color_manual(values=unique(sample_info_tab$LocationColor[order(sample_info_tab$Location)])) + theme(legend.title = element_blank())
> plot_richness(ASV_physeq, x="Type", color="Depth", measures=c("Chao1", "Shannon")) +
scale_color_manual(values=unique(sample_info_tab$NewColor[order(sample_info_tab$Depth)])) +
theme(legend.title = element_blank())
> phyla_counts_tab <- otu_table(tax_glom(ASV_physeq, taxrank="Phylum"))
> phyla_tax_vec <- as.vector(tax_table(tax_glom(ASV_physeq, taxrank="Phylum"))[,2])
> rownames(phyla_counts_tab) <- as.vector(phyla_tax_vec)
> unclassified_tax_counts <- colSums(count_tab) - colSums(phyla_counts_tab)
> phyla_and_unidentified_counts_tab <- rbind(phyla_counts_tab, "Unclassified"=unclassified_tax_counts)
> temp_major_taxa_counts_tab <- phyla_and_unidentified_counts_tab[!row.names(phyla_and_unidentified_counts_tab) %in% "Proteobacteria", ]
> class_counts_tab <- otu_table(tax_glom(ASV_physeq, taxrank="Class"))
> class_tax_phy_tab <- tax_table(tax_glom(ASV_physeq, taxrank="Class"))
> phy_tmp_vec <- class_tax_phy_tab[,2]
> class_tmp_vec <- class_tax_phy_tab[,3]
> rows_tmp <- row.names(class_tax_phy_tab)
> class_tax_tab <- data.frame("Phylum"=phy_tmp_vec, "Class"=class_tmp_vec, row.names = rows_tmp)
> proteo_classes_vec <- as.vector(class_tax_tab[class_tax_tab$Phylum == "Proteobacteria", "Class"])
> rownames(class_counts_tab) <- as.vector(class_tax_tab$Class)
> proteo_class_counts_tab <- class_counts_tab[row.names(class_counts_tab) %in% proteo_classes_vec, ]
> proteo_no_class_annotated_counts <- phyla_and_unidentified_counts_tab[row.names(phyla_and_unidentified_counts_tab) %in% "Proteobacteria", ] - colSums(proteo_class_counts_tab)
> major_taxa_counts_tab <- rbind(temp_major_taxa_counts_tab, proteo_class_counts_tab, "Unresolved_Proteobacteria"=proteo_no_class_annotated_counts)
> identical(colSums(major_taxa_counts_tab), colSums(count_tab))
> major_taxa_proportions_tab <- apply(major_taxa_counts_tab, 2, function(x) x/sum(x)*100)
> dim(major_taxa_proportions_tab)
[1] 85 54
> temp_filt_major_taxa_proportions_tab <- data.frame(major_taxa_proportions_tab[apply(major_taxa_proportions_tab, 1, max) > 5, ])
> dim(temp_filt_major_taxa_proportions_tab)
> filtered_proportions <- colSums(major_taxa_proportions_tab) - colSums(temp_filt_major_taxa_proportions_tab)
> filt_major_taxa_proportions_tab <- rbind(temp_filt_major_taxa_proportions_tab, "Other"=filtered_proportions)
> filt_major_taxa_proportions_tab_for_plot <- filt_major_taxa_proportions_tab
> filt_major_taxa_proportions_tab_for_plot$Major_Taxa <- row.names(filt_major_taxa_proportions_tab_for_plot)
> filt_major_taxa_proportions_tab_for_plot.g <- gather(filt_major_taxa_proportions_tab_for_plot, Sample, Proportion, -Major_Taxa)
> head(filt_major_taxa_proportions_tab_for_plot.g)
> head(filt_major_taxa_proportions_tab_for_plot)
> sample_info_for_merge<-data.frame("Sample"=row.names(sample_info_tab), "Type"=sample_info_tab$Type, "color"=sample_info_tab$Color, stringsAsFactors=F)
> sample_info_for_mergeN<-data.frame("Sample"=row.names(sample_info_tab), "Depth"=sample_info_tab$Depth, "color"=sample_info_tab$NewColor, stringsAsFactors=F)
> sample_info_for_mergeL<-data.frame("Sample"=row.names(sample_info_tab), "Location"=sample_info_tab$Location, "color"=sample_info_tab$LocationColor, stringsAsFactors=F)
> filt_major_taxa_proportions_tab_for_plot.g2 <- merge(filt_major_taxa_proportions_tab_for_plot.g, sample_info_for_merge)
> ggplot(filt_major_taxa_proportions_tab_for_plot.g2, aes(x=Sample, y=Proportion, fill=Major_Taxa)) +
geom_bar(width=0.6, stat="identity") + theme_bw() + theme(axis.text.x=element_text(angle=90, vjust=0.4, hjust=1), legend.title=element_blank()) + labs(x="Sample", y="% of 16S rRNA gene copies recovered", title="All samples")
> ggplot(filt_major_taxa_proportions_tab_for_plot.g2, aes(Major_Taxa, Proportion)) +
geom_jitter(aes(color=factor(Type)), size=2, width=0.15, height=0) + scale_color_manual(values=unique(filt_major_taxa_proportions_tab_for_plot.g2$color[order(filt_major_taxa_proportions_tab_f or_plot.g2$Type)])) + geom_boxplot(fill=NA, outlier.color=NA) + theme(axis.text.x=element_text(angle=90, vjust=0.4, hjust=1), legend.title=element_blank()) + labs(x="Major Taxa", y="% of 16S rRNA gene copies recovered", title="All samples")
> ggplot(filt_major_taxa_proportions_tab_for_plotN.g2, aes(Major_Taxa, Proportion)) +
geom_jitter(aes(color=factor(Depth)), size=2, width=0.15, height=0) + scale_color_manual(values=unique(filt_major_taxa_proportions_tab_for_plotN.g2$color[order(filt_major_taxa_proportions_tab_ for_plotN.g2$Depth)])) + geom_boxplot(fill=NA, outlier.color=NA) + theme(axis.text.x=element_text(angle=90, vjust=0.4, hjust=1), legend.title=element_blank()) + labs(x="Major Taxa", y="% of 16S rRNA gene copies recovered", title="All samples")
> ggplot(filt_major_taxa_proportions_tab_for_plotL.g2, aes(Major_Taxa, Proportion)) +
geom_jitter(aes(color=factor(Location)), size=2, width=0.15, height=0) + scale_color_manual(values=unique(filt_major_taxa_proportions_tab_for_plotL.g2$color[order(filt_major_taxa_proportions_tab_ for_plotL.g2$Location)])) + geom_boxplot(fill=NA, outlier.color=NA) + theme(axis.text.x=element_text(angle=90, vjust=0.4, hjust=1), legend.title=element_blank()) + labs(x="Major Taxa", y="% of 16S rRNA gene copies recovered", title="All samples")
> anova(betadisper(euc_dist, sample_info_tab$Type))
#Response: Distances
Df Sum Sq Mean Sq F value Pr(>F)
Groups 3 25138 8379.4 2.7634 0.05157 .
Residuals 50 151613 3032.3
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
> adonis(euc_dist~sample_info_tab$Type)
Terms added sequentially (first to last)
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
sample_info_tab$Type 3 781797 260599 1.9516 0.10482 0.008 **
Residuals 50 6676437 133529 0.89518
Total 53 7458234 1.00000
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
adonis(euc_distsample_info_tab$Depth)
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
sample_info_tab$Depth 6 730765 121794 0.85089 0.09798 0.806
Residuals 47 6727469 143138 0.90202
Total 53 7458234 1.00000
adonis(euc_distsample_info_tab$Location)
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
sample_info_tab$Location 3 2789750 929917 9.9595 0.37405 0.001 ***
Residuals 50 4668484 93370 0.62595
Total 53 7458234 1.00000
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
> Tmarg_sample_IDs <- row.names(sample_info_tab)[sample_info_tab$Type == "Thiomargarita"]
> Tmarg_euc_dist <- dist(t(vst_trans_count_tab[ , colnames(vst_trans_count_tab) %in% Tmarg_sample_IDs]))
> Tmarg_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% Tmarg_sample_IDs, ]
anova(betadisper(Tmarg_euc_dist, Tmarg_sample_info_tab$Location))
#Response: Distances
Df Sum Sq Mean Sq F value Pr(>F)
Groups 2 63923 31961 15.829 0.002526 **
Residuals 7 14134 2019
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
> Begg_sample_IDs <- row.names(sample_info_tab)[sample_info_tab$Type == "Beggiatoa"]
> Begg_euc_dist <- dist(t(vst_trans_count_tab[ , colnames(vst_trans_count_tab) %in% Begg_sample_IDs]))
> Begg_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% Begg_sample_IDs, ]
anova(betadisper(Begg_euc_dist, Begg_sample_info_tab$Location))
Df Sum Sq Mean Sq F value Pr(>F)
Groups 1 46025 46025 15.572 0.05863 .
Residuals 2 5911 2956
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1
> sample_IDs_23002 <- row.names(sample_info_tab)[sample_info_tab$Location == "St_23002"]
> sample_IDs_23020 <- row.names(sample_info_tab)[sample_info_tab$Location == "St_23020"]
> sample_IDs_20020 <- row.names(sample_info_tab)[sample_info_tab$Location == "St_20020"]
> sample_IDs_Geochem21 <- row.names(sample_info_tab)[sample_info_tab$Location == "Geochem_21"]
> sample_IDs_23002_count_phy <- otu_table(count_tab[, colnames(count_tab) %in% sample_IDs_23002], taxa_are_rows=T)
> sample_IDs_23020_count_phy <- otu_table(count_tab[, colnames(count_tab) %in% sample_IDs_23020], taxa_are_rows=T)
> sample_IDs_20020_count_phy <- otu_table(count_tab[, colnames(count_tab) %in% sample_IDs_20020], taxa_are_rows=T)
> sample_IDs_Geochem21_count_phy <- otu_table(count_tab[, colnames(count_tab) %in% sample_IDs_Geochem21], taxa_are_rows=T)
> sample_IDs_23002_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% sample_IDs_23002, ]
> sample_IDs_23020_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% sample_IDs_23020, ]
> sample_IDs_20020_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% sample_IDs_20020, ]
> sample_IDs_Geochem21_sample_info_tab <- sample_info_tab[row.names(sample_info_tab) %in% sample_IDs_Geochem21, ]
> sample_IDs_23002_sample_info_tab_phy <- sample_data(sample_IDs_23002_sample_info_tab)
> sample_IDs_23020_sample_info_tab_phy <- sample_data(sample_IDs_23020_sample_info_tab)
> sample_IDs_20020_sample_info_tab_phy <- sample_data(sample_IDs_20020_sample_info_tab)
> sample_IDs_Geochem21_sample_info_tab_phy <- sample_data(sample_IDs_Geochem21_sample_info_tab)
> p23002_physeq <- phyloseq(sample_IDs_23002_count_phy, sample_IDs_23002_sample_info_tab_phy)
> p23020_physeq <- phyloseq(sample_IDs_23020_count_phy, sample_IDs_23020_sample_info_tab_phy)
> p20020_physeq <- phyloseq(sample_IDs_20020_count_phy, sample_IDs_20020_sample_info_tab_phy)
> Geochem21_physeq <- phyloseq(sample_IDs_Geochem21_count_phy, sample_IDs_Geochem21_sample_info_tab_phy)
> p23002_deseq <- phyloseq_to_deseq2(p23002_physeq, ~Type)
> p23020_deseq <- phyloseq_to_deseq2(p23020_physeq, ~Type)
> p20020_deseq <- phyloseq_to_deseq2(p20020_physeq, ~Type)
> Geochem21_deseq <- phyloseq_to_deseq2(Geochem21_physeq, ~Type)
> p23002_deseq <- DESeq(p23002_deseq)
> p23020_deseq <- DESeq(p23020_deseq)
> p20020_deseq <- DESeq(p20020_deseq)
> Geochem21_deseq <- DESeq(Geochem21_deseq)
> deseq_res_Beggiatoa_p20020 <- results(p20020_deseq, alpha=0.01, contrast=c("Type", "Beggiatoa", "Live Sediment LSB Removed"))
> summary(deseq_res_Beggiatoa_p20020)
Positive hits for > 0.01
> sigtab_deseq_res_Beggiatoa_p20020 <- deseq_res_Beggiatoa_p20020[which(deseq_res_Beggiatoa_p20020$padj < 0.01), ]
> summary(sigtab_deseq_res_Beggiatoa_p20020)
> sigtab_deseq_res_Beggiatoa_p20020_with_tax <- cbind(as(sigtab_deseq_res_Beggiatoa_p20020, "data.frame"), as(tax_table(ASV_physeq)[row.names(sigtab_deseq_res_Beggiatoa_p20020), ], "matrix"))
> sigtab_deseq_res_Beggiatoa_p20020_with_tax[order(sigtab_deseq_res_Beggiatoa_p20020_with_tax$baseMean, decreasing=T), ]
> write.table(sigtab_deseq_res_Beggiatoa_p20020_with_tax, "sigtab_deseq_res_Beggiatoa_p20020_with_tax.tsv", sep="\t", quote=F, col.names=NA)
> deseq_res_Thiom_Geochem21 <- results(Geochem21_deseq, alpha=0.01, contrast=c("Type", "Thiomargarita", "Live Sediment LSB Removed"))
> summary(deseq_res_Thiom_Geochem21)
out of 17652 with nonzero total read count adjusted p-value < 0.01 LFC > 0 (up) : 57, 0.32% LFC < 0 (down) : 1, 0.0057% outliers [1] : 67, 0.38% low counts [2] : 15273, 87% (mean count < 7)
> sigtab_deseq_res_Thiom_Geochem21 <- deseq_res_Thiom_Geochem21[which(deseq_res_Thiom_Geochem21$padj < 0.01), ]
> summary(sigtab_deseq_res_Thiom_Geochem21)
> sigtab_deseq_res_Thiom_Geochem21_with_tax <- cbind(as(sigtab_deseq_res_Thiom_Geochem21, "data.frame"), as(tax_table(ASV_physeq)[row.names(sigtab_deseq_res_Thiom_Geochem21), ], "matrix"))
> sigtab_deseq_res_Thiom_Geochem21_with_tax[order(sigtab_deseq_res_Thiom_Geochem21_with_tax$baseMean, decreasing=T), ]
> write.table(sigtab_deseq_res_Thiom_Geochem21_with_tax, "sigtab_deseq_res_Thiom_Geochem21_with_tax.tsv", sep="\t", quote=F, col.names=NA)
> deseq_res_Thiom_p23020_deseq <- results(p23020_deseq, alpha=0.01, contrast=c("Type", "Thiomargarita", "Live Sediment LSB Removed"))
> summary(deseq_res_Thiom_p23020_deseq)
out of 22661 with nonzero total read count adjusted p-value < 0.01 LFC > 0 (up) : 102, 0.45% LFC < 0 (down) : 6, 0.026% outliers [1] : 80, 0.35% low counts [2] : 16425, 72% (mean count < 3)
> sigtab_deseq_res_Thiom_p23020 <- deseq_res_Thiom_p23020_deseq[which(deseq_res_Thiom_p23020_deseq$padj < 0.01), ]
summary(sigtab_deseq_res_Thiom_p23020)
> sigtab_deseq_res_Thiom_p23020_with_tax <- cbind(as(sigtab_deseq_res_Thiom_p23020, "data.frame"), as(tax_table(ASV_physeq)[row.names(sigtab_deseq_res_Thiom_p23020), ], "matrix"))
>sigtab_deseq_res_Thiom_p23020_with_tax[order(sigtab_deseq_res_Thiom_p23020$baseMean, decreasing=T), ]
> write.table(sigtab_deseq_res_Thiom_p23020_with_tax, "sigtab_deseq_res_Thiom_p23020_with_tax.tsv", sep="\t", quote=F, col.names=NA)