########################### # DRUM ########################### #KAG 020322 #0. Set up R Environment setwd("/Users/altagannon/OneDrive/GraduateSchool/Thum/DRUM") library(dplyr) library(sf) library(raster) library(ape) suppressMessages(library(mosaic)) suppressMessages(library(readxl)) library(tidyverse) library(devtools) library(lme4) library(car) library(effects) library(mosaic) library(writexl) #1. Input Data ##### all_lakes <- read_xlsx("/Users/altagannon/OneDrive/GraduateSchool/Thum/DRUM/Data/All_Lakes_KAG_020322.xlsx") all_lakes <- as.data.frame(all_lakes) #Break lakes into separate dataframes to be analyzed individually all_lakes$Lake <- as.factor(all_lakes$Lake) levels(all_lakes$Lake) eagle <- subset(all_lakes, all_lakes$Lake == "Bald_Eagle") head(eagle) christmas <- subset(all_lakes, all_lakes$Lake == "Christmas") head(christmas) grays_bay <- subset(all_lakes, all_lakes$Lake == "Grays_Bay") head(grays_bay) ham <- subset(all_lakes, all_lakes$Lake == "Ham") head(ham) indp <- subset(all_lakes, all_lakes$Lake == "Independence") head(indp) north_arm <- subset(all_lakes, all_lakes$Lake == "North_Arm") head(north_arm) phelps <- subset(all_lakes, all_lakes$Lake == "Phelps_Bay") head(phelps) smith_geno <- subset(all_lakes, all_lakes$Lake == "Smiths_Bay") head(smith_geno) ##### # ANALYZE BALD EAGLE LAKE ##### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ head(eagle) eagle_counts <- eagle %>% count(Year_TP,Strain, Taxa) eagle eagle_counts$Percent_Ocupied <- ((eagle_counts$n)/151)*100 eagle_plot_Percent_Ocupied <- eagle_counts %>% ggplot()+geom_line(aes(x=Year_TP,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year_TP,y=Percent_Ocupied, col=Strain), size = 2) eagle_PO_annotated <- print(eagle_plot_Percent_Ocupied + ggtitle("A. Bald Eagle") + theme_light() + geom_vline(xintercept = 2018.25, linetype="dashed", color = "red") + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ set.seed(1025) head(eagle) eagle$Strain <- as.factor(eagle$Strain) eagle$Year_TP_F <- as.factor(eagle$Year_TP) eagle$Year_F <- as.factor(eagle$Year) eagle_2018 <- eagle[eagle$Year == 2018 , ] eagle_2019 <- eagle[eagle$Year == 2019 , ] eagle_2020 <- eagle[eagle$Year == 2020 , ] #CHI SQUARED 2018 eagle_chi_geno_2018 <- chisq.test(eagle_2018$Strain, eagle_2018$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) eagle_chi_geno_2018$p.value #CHI SQUARED 2019 eagle_chi_geno_2019 <- chisq.test(eagle_2019$Strain, eagle_2019$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) eagle_chi_geno_2019$p.value #CHI SQUARED 2020 eagle_chi_geno_2020 <- chisq.test(eagle_2020$Strain, eagle_2020$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) eagle_chi_geno_2020$p.value #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(eagle) tally(~Year_F, data = eagle) eagle$Strain <- as.factor(eagle$Strain) eagle$Year_TP_F <- as.factor(eagle$Year_TP_F) eagle$Year_F <- as.factor(eagle$Year) time_one <- c("2018", "2019", "2020") eagle_btwn <- eagle[eagle$Year_TP_F %in% time_one , ] eagle_btwn <- droplevels(eagle_btwn) tally(~Year_TP_F, data = eagle_btwn) eagle_btwn_18_19 <-eagle_btwn[eagle_btwn$Year == 2018 | eagle_btwn$Year == 2019 , ] eagle_btwn_18_19 <- droplevels(eagle_btwn_18_19) tally(~Year, data = eagle_btwn_18_19) eagle_btwn_18_19$Strain <- as.factor(eagle_btwn_18_19$Strain) eagle_btwn_18_19$Year_F <- as.factor(eagle_btwn_18_19$Year) eagle_btwn_19_20 <-eagle_btwn[eagle_btwn$Year == 2019 | eagle_btwn$Year == 2020 , ] eagle_btwn_19_20 <- droplevels(eagle_btwn_19_20) tally(~Year_TP_F, data = eagle_btwn_19_20) eagle_btwn_19_20$Strain <- as.factor(eagle_btwn_19_20$Strain) eagle_btwn_19_20$Year_F <- as.factor(eagle_btwn_19_20$Year) #CHI SQUARED COMPARING 2018 TO 2019 eagle_chi_btwn_18_19 <- chisq.test(eagle_btwn_18_19$Strain, eagle_btwn_18_19$Year_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) eagle_chi_btwn_18_19$p.value #CHI SQUARED COMPARING 2019 TO 2020 eagle_chi_btwn_19_20 <- chisq.test(eagle_btwn_19_20$Strain, eagle_btwn_19_20$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) eagle_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ head(eagle) tally(~Year_TP_F, data = eagle) list_three_yr <- c( 2018, 2020.5 ) eagle_three_yr_2018 <- as.data.frame(eagle[eagle$Year_TP_F == 2018 , ]) eagle_three_yr_2020 <- as.data.frame(eagle[eagle$Year_TP_F == 2020.5 , ]) eagle_three_yr <- rbind(eagle_three_yr_2018, eagle_three_yr_2020) eagle_chi_three_yr <- chisq.test(eagle_three_yr$Strain, eagle_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) eagle_chi_three_yr ##### # ANALYZE CHRISTMAS LAKE ##### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ head(christmas) christmas_counts <- christmas %>% count(Year_TP,Strain, Taxa) christmas_counts christmas_counts$Percent_Ocupied <- ((christmas_counts$n)/113)*100 christmas_plot_Percent_Ocupied <- christmas_counts %>% ggplot()+geom_line(aes(x=Year_TP,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year_TP,y=Percent_Ocupied, col=Strain), size = 2) Christmas_PO_annotated <- print(christmas_plot_Percent_Ocupied + ggtitle("H. Christmas") + theme_light() + ylim(0, 40) + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) Christmas_PO_annotated #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ set.seed(1025) head(christmas) christmas$Strain <- as.factor(christmas$Strain) christmas$Year_TP_F <- as.factor(christmas$Year_TP) christmas$Year_F <- as.factor(christmas$Year) christmas_2018 <- christmas[christmas$Year == 2018 , ] christmas_2019 <- christmas[christmas$Year == 2019 , ] christmas_2020 <- christmas[christmas$Year == 2020 , ] #CHI SQUARED 2018 head(christmas) christmas$Year_TP_F <- as.factor(christmas$Year_TP) christmas_chi_geno_2018 <- chisq.test(christmas_2018$Strain, christmas_2018$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) christmas_chi_geno_2018$p.value #CHI SQUARED 2019 christmas_chi_geno_2019 <- chisq.test(christmas_2019$Strain, christmas_2019$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) christmas_chi_geno_2019$p.value #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(christmas) tally(~Year_F, data = christmas) christmas$Strain <- as.factor(christmas$Strain) christmas$Year_TP_F <- as.factor(christmas$Year_TP_F) christmas$Year_F <- as.factor(christmas$Year) time_one <- c("2018", "2019", "2020") christmas_btwn <- christmas[christmas$Year_TP_F %in% time_one , ] christmas_btwn <- droplevels(christmas_btwn) tally(~Year_TP_F, data = christmas_btwn) christmas_btwn_18_19 <-christmas_btwn[christmas_btwn$Year == 2018 | christmas_btwn$Year == 2019 , ] christmas_btwn_18_19 <- droplevels(christmas_btwn_18_19) tally(~Year, data = christmas_btwn_18_19) christmas_btwn_18_19$Strain <- as.factor(christmas_btwn_18_19$Strain) christmas_btwn_18_19$Year_F <- as.factor(christmas_btwn_18_19$Year) christmas_btwn_19_20 <-christmas_btwn[christmas_btwn$Year == 2019 | christmas_btwn$Year == 2020 , ] christmas_btwn_19_20 <- droplevels(christmas_btwn_19_20) tally(~Year_TP_F, data = christmas_btwn_19_20) christmas_btwn_19_20$Strain <- as.factor(christmas_btwn_19_20$Strain) christmas_btwn_19_20$Year_F <- as.factor(christmas_btwn_19_20$Year) #CHI SQUARED COMPARING 2018 TO 2019 christmas_chi_btwn_18_19 <- chisq.test(christmas_btwn_18_19$Strain, christmas_btwn_18_19$Year_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) christmas_chi_btwn_18_19$p.value #CHI SQUARED COMPARING 2019 TO 2020 christmas_chi_btwn_19_20 <- chisq.test(christmas_btwn_19_20$Strain, christmas_btwn_19_20$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) christmas_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ head(christmas) tally(~Year_TP_F, data = christmas) christmas_three_yr_2018 <- as.data.frame(christmas[christmas$Year_TP_F == 2018 , ]) christmas_three_yr_2020 <- as.data.frame(christmas[christmas$Year_TP_F == 2020 , ]) christmas_three_yr <- rbind(christmas_three_yr_2018, christmas_three_yr_2020) christmas_chi_three_yr <- chisq.test(christmas_three_yr$Strain, christmas_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) christmas_chi_three_yr$p.value ##### # ANALYZE GRAYS BAY ###### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ head(grays_bay) #grays_bay$Year_TP <- grays_bay$Time_Point grays_bay_counts <- grays_bay %>% count(Year_TP,Strain, Taxa) grays_bay_counts grays_bay_counts$Percent_Ocupied <- ((grays_bay_counts$n)/125)*100 grays_bay_counts grays_bay_plot_Percent_Ocupied <- grays_bay_counts %>% ggplot()+geom_line(aes(x=Year_TP,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year_TP,y=Percent_Ocupied, col=Strain), size = 2) grays_PO_annotated <- print(grays_bay_plot_Percent_Ocupied + ggtitle("B. Grays Bay") + theme_light() + geom_vline(xintercept = 2018.25, linetype="dashed", color = "red") + geom_vline(xintercept = 2019.25, linetype="dashed", color = "red") + geom_vline(xintercept = 2020.25, linetype="dashed", color = "red") + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) grays_PO_annotated #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ head(grays_bay) grays_bay$Genotype <- as.factor(grays_bay$Strain) grays_bay$Year_TP_F <- as.factor(grays_bay$Year_TP) grays_bay$Year_F <- as.factor(grays_bay$Year) grays_bay_2018 <- grays_bay[grays_bay$Year == 2018 , ] grays_bay_2019 <- grays_bay[grays_bay$Year == 2019 , ] grays_bay_2020 <- grays_bay[grays_bay$Year == 2020 , ] grays_bay$Year_TP_F <- as.factor(grays_bay$Year_TP) #CHI SQUARED 2018 grays_bay_chi_geno_2018 <- chisq.test(grays_bay_2018$Strain, grays_bay_2018$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) grays_bay_chi_geno_2018$p.value #CHI SQUARED 2019 grays_bay_chi_geno_2019 <- chisq.test(grays_bay_2019$Strain, grays_bay_2019$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) grays_bay_chi_geno_2019$p.value #CHI SQUARED 2020 grays_bay_chi_geno_2020 <- chisq.test(grays_bay_2020$Strain, grays_bay_2020$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) grays_bay_chi_geno_2020$p.value #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(grays_bay) tally(~Year_F, data = grays_bay) grays_bay$Strain <- as.factor(grays_bay$Strain) grays_bay$Year_TP_F <- as.factor(grays_bay$Year_TP_F) grays_bay$Year_F <- as.factor(grays_bay$Year) time_one <- c("2018", "2019", "2020") grays_bay_btwn <- grays_bay[grays_bay$Year_TP_F %in% time_one , ] grays_bay_btwn <- droplevels(grays_bay_btwn) tally(~Year_TP_F, data = grays_bay_btwn) grays_bay_btwn_18_19 <-grays_bay_btwn[grays_bay_btwn$Year == 2018 | grays_bay_btwn$Year == 2019 , ] grays_bay_btwn_18_19 <- droplevels(grays_bay_btwn_18_19) tally(~Year, data = grays_bay_btwn_18_19) grays_bay_btwn_18_19$Strain <- as.factor(grays_bay_btwn_18_19$Strain) grays_bay_btwn_18_19$Year_F <- as.factor(grays_bay_btwn_18_19$Year) grays_bay_btwn_19_20 <-grays_bay_btwn[grays_bay_btwn$Year == 2019 | grays_bay_btwn$Year == 2020 , ] grays_bay_btwn_19_20 <- droplevels(grays_bay_btwn_19_20) tally(~Year_TP_F, data = grays_bay_btwn_19_20) grays_bay_btwn_19_20$Strain <- as.factor(grays_bay_btwn_19_20$Strain) grays_bay_btwn_19_20$Year_F <- as.factor(grays_bay_btwn_19_20$Year) #CHI SQUARED COMPARING 2018 TO 2019 grays_bay_chi_btwn_18_19 <- chisq.test(grays_bay_btwn_18_19$Strain, grays_bay_btwn_18_19$Year_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) grays_bay_chi_btwn_18_19$p.value #CHI SQUARED COMPARING 2019 TO 2020 grays_bay_chi_btwn_19_20 <- chisq.test(grays_bay_btwn_19_20$Strain, grays_bay_btwn_19_20$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) grays_bay_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ head(grays_bay) tally(~Year_TP_F, data = grays_bay) grays_bay_three_yr_2018 <- as.data.frame(grays_bay[grays_bay$Year_TP_F == 2018 , ]) grays_bay_three_yr_2020 <- as.data.frame(grays_bay[grays_bay$Year_TP_F == 2020.5 , ]) grays_bay_three_yr <- rbind(grays_bay_three_yr_2018, grays_bay_three_yr_2020) grays_bay_chi_three_yr <- chisq.test(grays_bay_three_yr$Strain, grays_bay_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) grays_bay_chi_three_yr ##### # ANALYZE HAM LAKE ###### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ names(ham) ham_counts <- ham %>% count(Year_TP,Strain, Taxa) ham_counts is.data.frame(ham_counts) ham_counts$Percent_Ocupied <- ((ham_counts$n)/147)*100 ham_counts$Strain <- ham_counts$Strain ham_plot_Percent_Ocupied <- ham_counts %>% ggplot()+geom_line(aes(x=Year_TP,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year_TP,y=Percent_Ocupied, col=Strain), size = 2) Ham_PO_annotated <- print(ham_plot_Percent_Ocupied + ggtitle("C. Ham") + theme_light() + geom_vline(xintercept = 2018.65, linetype="dashed", color = "red") + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) Ham_PO_annotated #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ set.seed(1025) head(ham) ham$Strain <- as.factor(ham$Strain) ham$Year_TP_F <- as.factor(ham$Year_TP) ham$Year_F <- as.factor(ham$Year) ham_2018 <- ham[ham$Year == 2018 , ] ham_2019 <- ham[ham$Year == 2019 , ] ham_2020 <- ham[ham$Year == 2020 , ] #CHI SQUARED 2018 ham_chi_geno_2018 <- chisq.test(ham_2018$Strain, ham_2018$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) ham_chi_geno_2018$p.value #CHI SQUARED 2019 ham_chi_geno_2019 <- chisq.test(ham_2019$Strain, ham_2019$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) ham_chi_geno_2019$p.value #CHI SQUARED 2020 ham_chi_geno_2020 <- chisq.test(ham_2020$Strain, ham_2020$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) ham_chi_geno_2020$p.value #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(ham) tally(~Year_F, data = ham) ham$Strain <- as.factor(ham$Strain) ham$Year_TP_F <- as.factor(ham$Year_TP_F) ham$Year_F <- as.factor(ham$Year) time_one <- c("2018", "2019", "2020") ham_btwn <- ham[ham$Year_TP_F %in% time_one , ] ham_btwn <- droplevels(ham_btwn) tally(~Year_TP_F, data = ham_btwn) ham_btwn_18_19 <-ham_btwn[ham_btwn$Year == 2018 | ham_btwn$Year == 2019 , ] ham_btwn_18_19 <- droplevels(ham_btwn_18_19) tally(~Year, data = ham_btwn_18_19) ham_btwn_18_19$Strain <- as.factor(ham_btwn_18_19$Strain) ham_btwn_18_19$Year_F <- as.factor(ham_btwn_18_19$Year) ham_btwn_19_20 <-ham_btwn[ham_btwn$Year == 2019 | ham_btwn$Year == 2020 , ] ham_btwn_19_20 <- droplevels(ham_btwn_19_20) tally(~Year_TP_F, data = ham_btwn_19_20) ham_btwn_19_20$Strain <- as.factor(ham_btwn_19_20$Strain) ham_btwn_19_20$Year_F <- as.factor(ham_btwn_19_20$Year) #CHI SQUARED COMPARING 2018 TO 2019 ham_chi_btwn_18_19 <- chisq.test(ham_btwn_18_19$Strain, ham_btwn_18_19$Year_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) ham_chi_btwn_18_19$p.value #CHI SQUARED COMPARING 2019 TO 2020 ham_chi_btwn_19_20 <- chisq.test(ham_btwn_19_20$Strain, ham_btwn_19_20$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) ham_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ ham_three_yr_2018 <- as.data.frame(ham[ham$Year_TP_F == 2018 , ]) ham_three_yr_2020 <- as.data.frame(ham[ham$Year_TP_F == 2020.5 , ]) ham_three_yr <- rbind(ham_three_yr_2018, ham_three_yr_2020) ham_chi_three_yr <- chisq.test(ham_three_yr$Strain, ham_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) ham_chi_three_yr ##### # ANALYZE INDEPENDENCE LAKE ##### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ head(indp) indp_counts <- indp %>% count(Year_TP,Strain, Taxa) indp_counts is.data.frame(indp_counts) indp_counts$Percent_Ocupied <- ((indp_counts$n)/198)*100 indp_counts$Strain <- indp_counts$Strain indp_plot_Percent_Ocupied <- indp_counts %>% ggplot()+geom_line(aes(x=Year_TP,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year_TP,y=Percent_Ocupied, col=Strain), size = 2) indp_PO_annotated <- print(indp_plot_Percent_Ocupied + ggtitle("G. Independence") + theme_light() + ylim(0, 18) + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ head(indp) indp$Year_TP_F <- as.factor(indp$Year_TP) indp$Strain <- as.factor(indp$Strain) indp_2018 <- indp[indp$Year == 2018 , ] indp_2019 <- indp[indp$Year == 2019 , ] indp_2020 <- indp[indp$Year == 2020 , ] #CHI SQUARED 2018 indp_chi_geno_2018 <- chisq.test(indp_2018$Strain, indp_2018$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) indp_chi_geno_2018$p.value #CHI SQUARED 2019 indp_chi_geno_2019 <- chisq.test(indp_2019$Strain, indp_2019$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) indp_chi_geno_2019$p.value #CHI SQUARED 2020 indp_chi_geno_2020 <- chisq.test(indp_2020$Strain, indp_2020$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) indp_chi_geno_2020$p.value #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(indp) tally(~Year_TP_F, data = indp) time_one <- c("2018", "2019", "2020") indp_btwn <- indp[indp$Year_TP_F %in% time_one , ] indp_btwn <- droplevels(indp_btwn) tally(~Year_TP_F, data = indp_btwn) indp_btwn_18_19 <-indp_btwn[indp_btwn$Year == 2018 | indp_btwn$Year == 2019 , ] indp_btwn_18_19 <- droplevels(indp_btwn_18_19) tally(~Year_TP_F, data = indp_btwn_18_19) indp_btwn_19_20 <-indp_btwn[indp_btwn$Year == 2019 | indp_btwn$Year == 2020 , ] indp_btwn_19_20 <- droplevels(indp_btwn_19_20) tally(~Year_TP_F, data = indp_btwn_19_20) #CHI SQUARED COMPARING 2018 TO 2019 indp_chi_btwn_18_19 <- chisq.test(indp_btwn_18_19$Strain, indp_btwn_18_19$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) indp_chi_btwn_18_19$p.value #CHI SQUARED COMPARING 2019 TO 2020 indp_chi_btwn_19_20 <- chisq.test(indp_btwn_19_20$Strain, indp_btwn_19_20$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) indp_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ head(indp) tally(~Year_TP, data = indp) indp$Strain <- as.factor(indp$Strain) indp$Year_TP_F <- as.factor(indp$Year_TP) indp_three_yr_2018 <- as.data.frame(indp[indp$Year_TP_F == 2018 , ]) indp_three_yr_2020 <- as.data.frame(indp[indp$Year_TP_F == 2020.5 , ]) indp_three_yr <- rbind(indp_three_yr_2018, indp_three_yr_2020) indp_chi_three_yr <- chisq.test(indp_three_yr$Strain, indp_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) indp_chi_three_yr ##### #ANALYZE NORTH ARM ##### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ head(north_arm) north_arm$Genotype <- as.factor(north_arm$Strain) north_arm$Year_TP_F <- as.factor(north_arm$Y) north_arm$Year_F <- as.factor(north_arm$Year) north_arm$Year <- as.numeric(north_arm$Year) head(north_arm) tally(~Year, data = north_arm) north_arm$Year_TP <- north_arm$Time_Point north_arm_counts <- north_arm %>% count(Year_TP_F,Strain) north_arm_counts north_arm_counts$Percent_Ocupied <- ((north_arm_counts$n)/229)*100 north_arm_counts north_arm$Strain <- as.factor(north_arm$Strain) north_arm_plot_Percent_Ocupied <- north_arm_counts %>% ggplot()+geom_line(aes(x=Year,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year_TP_F,y=Percent_Ocupied, col=Strain), size = 2) north_arm_PO_annotated <- print(north_arm_plot_Percent_Ocupied + ggtitle("E. North Arm") + theme_light() + geom_vline(xintercept = 2018.25, linetype="dashed", color = "red") + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) north_arm_PO_annotated #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ #Only one timepoint per year for North Arm Lake set.seed(1025) #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(north_arm) tally(~Year_F, data = north_arm) north_arm$Genotype <- as.factor(north_arm$Genotype) north_arm$Year_TP_F <- as.factor(north_arm$Year_TP_F) north_arm$Year_F <- as.factor(north_arm$Year) time_one <- c("2018", "2019", "2020") north_arm_btwn <- north_arm[north_arm$Year %in% time_one , ] north_arm_btwn <- droplevels(north_arm_btwn) tally(~Year, data = north_arm_btwn) north_arm_btwn_18_19 <-north_arm_btwn[north_arm_btwn$Year == 2018 | north_arm_btwn$Year == 2019 , ] north_arm_btwn_18_19 <- droplevels(north_arm_btwn_18_19) tally(~Year, data = north_arm_btwn_18_19) north_arm_btwn_18_19$Genotype <- as.factor(north_arm_btwn_18_19$Genotype) north_arm_btwn_18_19$Year_F <- as.factor(north_arm_btwn_18_19$Year) north_arm_btwn_19_20 <-north_arm_btwn[north_arm_btwn$Year == 2019 | north_arm_btwn$Year == 2020 , ] north_arm_btwn_19_20 <- droplevels(north_arm_btwn_19_20) tally(~Year_TP_F, data = north_arm_btwn_19_20) north_arm_btwn_19_20$Genotype <- as.factor(north_arm_btwn_19_20$Genotype) north_arm_btwn_19_20$Year_F <- as.factor(north_arm_btwn_19_20$Year) #CHI SQUARED COMPARING 2018 TO 2019 north_arm_chi_btwn_18_19 <- chisq.test(north_arm_btwn_18_19$Genotype, north_arm_btwn_18_19$Year, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) north_arm_chi_btwn_18_19$p.value #CHI SQUARED COMPARING 2019 TO 2020 north_arm_chi_btwn_19_20 <- chisq.test(north_arm_btwn_19_20$Genotype, north_arm_btwn_19_20$Year, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) north_arm_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ head(north_arm) north_arm$Genotype <- as.factor(north_arm$Genotype) north_arm$Year_TP_F <- as.factor(north_arm$Year_TP) tally(~Genotype + Year_TP_F, data = north_arm) levels(north_arm$Year_TP_F) north_arm_three_yr_2018 <- as.data.frame(north_arm[north_arm$Year_TP_F == 2018 , ]) tally(~Genotype + Year_TP_F, data = north_arm_three_yr_2018) north_arm_three_yr_2020 <- as.data.frame(north_arm[north_arm$Year_TP_F == 2020 , ]) tally(~Genotype + Year_TP_F, data = north_arm_three_yr_2020) north_arm_three_yr <- rbind(north_arm_three_yr_2018, north_arm_three_yr_2020) tally(~Genotype + Year_TP_F, data = north_arm_three_yr) north_arm_chi_three_yr <- chisq.test(north_arm_three_yr$Genotype, north_arm_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) north_arm_chi_three_yr ###### # ANALYZE PHELPS BAY ##### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ head(phelps) tally(~Year_TP, data = phelps) phelps_counts <- phelps %>% count(Year_TP,Strain) phelps_counts phelps_counts$Percent_Ocupied <- ((phelps_counts$n)/148)*100 phelps_counts phelps_plot_Percent_Ocupied <- phelps_counts %>% ggplot()+geom_line(aes(x=Year_TP,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year_TP,y=Percent_Ocupied, col=Strain), size = 2) phelps_PO_annotated <- print(phelps_plot_Percent_Ocupied + ggtitle("D. Phelps") + theme_light() + xlim(2018, 2020.5) + geom_vline(xintercept = 2019.95, linetype="dashed", color = "red") + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ set.seed(1025) head(phelps) phelps$Strain <- as.factor(phelps$Strain) phelps$Year_TP_F <- as.factor(phelps$Year_TP) head(phelps) tally(~Year_TP_F, data = phelps) head(phelps) tally(~ Year, data = phelps) phelps_2019 <- phelps[phelps$Year == 2019 , ] phelps_2020 <- phelps[phelps$Year == 2020 , ] tally(~ Strain + Year_TP_F, data = phelps_2020) #CHI SQUARED 2020 tally(~Strain + Year_TP_F, data = phelps_2020) phelps_chi_geno_2020 <- chisq.test(phelps_2020$Strain, phelps_2020$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) phelps_chi_geno_2020$p.value #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(phelps) tally(~Year_TP_F, data = phelps) phelps$Strain <- as.factor(phelps$Strain) phelps$Year_TP_F <- as.factor(phelps$Year_TP_F) phelps$Year_F <- as.factor(phelps$Year) phelps_time <- c("2019.5", "2020.5") phelps_btwn <- phelps[phelps$Year_TP_F %in% phelps_time , ] phelps_btwn <- droplevels(phelps_btwn) tally(~Year_TP_F, data = phelps_btwn) phelps_chi_btwn_19_20 <- chisq.test(phelps_btwn$Strain, phelps_btwn$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) phelps_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ head(phelps) names(phelps)[names(phelps)== "Time_Point"] <- "Year_TP_F" phelps$Strain <- as.factor(phelps$Strain) phelps$Year_TP_F <- as.factor(phelps$Year_TP_F) phelps_three_yr_2018 <- as.data.frame(phelps[phelps$Year_TP_F == 2019.5 , ]) phelps_three_yr_2020 <- as.data.frame(phelps[phelps$Year_TP_F == 2020.5 , ]) phelps_three_yr <- rbind(phelps_three_yr_2018, phelps_three_yr_2020) phelps_chi_three_yr <- chisq.test(phelps_three_yr$Strain, phelps_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) phelps_chi_three_yr ##### #ANALYZE SMITHS BAY ##### #1 Graph with y-axis as percent of total sites visited #_________________________________________________________________________________________________ head(smith_geno) smith_geno_counts <- smith_geno %>% count(Year,Strain, Taxa) smith_geno_counts is.data.frame(smith_geno_counts) smith_geno_counts$Percent_Ocupied <- ((smith_geno_counts$n)/127)*100 smith_geno_plot_Percent_Ocupied <- smith_geno_counts %>% ggplot()+geom_line(aes(x=Year,y=Percent_Ocupied,col=Strain))+geom_point(aes(x=Year,y=Percent_Ocupied, col=Strain), size = 2) smith_PO_annotated <- print(smith_geno_plot_Percent_Ocupied + ggtitle("F. Smith") + theme_light() + guides(col=guide_legend(ncol=1,byrow=TRUE)) + labs(y="Percent of Sampled Sites (%)", x = "Year") ) #2) CHI SQUARED ANALYSIS WITHIN YEAR #_________________________________________________________________________________________________ #Only have data for one timepoint for each year in Smith's Bay set.seed(1025) head(smith_geno) smith_geno$Strain <- as.factor(smith_geno$Strain) smith_geno$Year_F <- as.factor(smith_geno$Year) tally(~Year_F, data = smith_geno) smith_geno_2018 <- smith_geno[smith_geno$Year == 2018 , ] smith_geno_2019 <- smith_geno[smith_geno$Year == 2019 , ] smith_geno_2020 <- smith_geno[smith_geno$Year == 2020 , ] #3) CHI SQUARED ANALYSIS BTWN YEARS #_________________________________________________________________________________________________ head(smith_geno) tally(~Year_F, data = smith_geno) smith_geno$Strain <- as.factor(smith_geno$Strain) smith_geno$Year_F <- as.factor(smith_geno$Year) time_one <- c("2018", "2019", "2020") smith_geno_btwn <- smith_geno[smith_geno$Year_F %in% time_one , ] smith_geno_btwn <- droplevels(smith_geno_btwn) smith_geno_btwn_18_19 <-smith_geno_btwn[smith_geno_btwn$Year == 2018 | smith_geno_btwn$Year == 2019 , ] smith_geno_btwn_18_19 <- droplevels(smith_geno_btwn_18_19) tally(~Year, data = smith_geno_btwn_18_19) smith_geno_btwn_18_19$Strain <- as.factor(smith_geno_btwn_18_19$Strain) smith_geno_btwn_18_19$Year_F <- as.factor(smith_geno_btwn_18_19$Year) smith_geno_btwn_19_20 <-smith_geno_btwn[smith_geno_btwn$Year == 2019 | smith_geno_btwn$Year == 2020 , ] smith_geno_btwn_19_20 <- droplevels(smith_geno_btwn_19_20) tally(~Year_F, data = smith_geno_btwn_19_20) smith_geno_btwn_19_20$Strain <- as.factor(smith_geno_btwn_19_20$Strain) smith_geno_btwn_19_20$Year_F <- as.factor(smith_geno_btwn_19_20$Year) #CHI SQUARED COMPARING 2018 TO 2019 smith_geno_chi_btwn_18_19 <- chisq.test(smith_geno_btwn_18_19$Strain, smith_geno_btwn_18_19$Year_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) smith_geno_chi_btwn_18_19$p.value #CHI SQUARED COMPARING 2019 TO 2020 smith_geno_chi_btwn_19_20 <- chisq.test(smith_geno_btwn_19_20$Strain, smith_geno_btwn_19_20$Year_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) smith_geno_chi_btwn_19_20$p.value #4) ALL THREE YEARS (first t1 to last t2) #_________________________________________________________________________________________________ head(smith_geno) tally(~Year, data = smith_geno) smith_geno$Year_TP_F <- smith_geno$Year_F smith_geno$Strain <- as.factor(smith_geno$Strain) smith_geno$Year_TP_F <- as.factor(smith_geno$Year_TP) smith_geno_three_yr_2018 <- as.data.frame(smith_geno[smith_geno$Year_TP_F == 2018 , ]) smith_geno_three_yr_2020 <- as.data.frame(smith_geno[smith_geno$Year_TP_F == 2020 , ]) smith_geno_three_yr <- rbind(smith_geno_three_yr_2018, smith_geno_three_yr_2020) smith_geno_chi_three_yr <- chisq.test(smith_geno_three_yr$Strain, smith_geno_three_yr$Year_TP_F, correct = TRUE, p = rep(1/length(x), length(x)), rescale.p = FALSE, simulate.p.value = TRUE, B = 2000) smith_geno_chi_three_yr ##### #THE END