#' ## Northwest Minnesota - Data Import and Cleaning
#' 
#' ### Nina Hill  -  Secretive marshbird response to invasive wetland plant management
#' 
#' University of Minnesota, Minnesota Cooperative Fish & Wildlife Research Unit
#' 
#' 135 Skok Hall, Upper Buford Circle, Saint Paul, Minnesota 55108
#' 
#' hillx725@umn.edu
format(Sys.Date(), "%Y %B %d")

#' *******************************************************************************************************
#' ## NORTHWESTERN MINNESOTA STUDY AREA
#'    Import data NWBIRD and NWSURV, combine and reshape into NWDATA, NWPOINT.ALL & NWPOINT.SURV
#'    Subset Round2 detections 
#'    Calculate mean change in detections before to each year after herbicide treatment
#'    Calculate differences in change between spray and control(non-spray) areas
#'    Compare in T-tests
#'    
#'    Manston dropped (spray zones moved), TLS4 dropped (really far from spray zone), EPC3 dropped 
#' *******************************************************************************************************

# 0) Set up workspace-----------------------------------
    setwd("F:/forDRUM")
    library(stringr); library(ggplot2); library(ggpubr)
    library(plyr); library(tidyr); library(tidyverse)
    rm(list=ls(all=TRUE)) # Clear out any objects in memory
    
    birdcolors<-c("orange", "turquoise","purple","tomato","dodgerblue") 
    # birdcolors2 <- c("#F5E771", "#556471","#F0E8E3", "#716B5B", "#748BA7")
    bird.broadcast=c("AMBI","LEBI","PBGR","SORA","VIRA")
    # bird.secondary=c("RNGR", "WIPH", "SACR","YHBL", "WISN", "AMCO", "BLTE", "UPSA", "NO BIRDS DETECTED")
    years <- c("2015","2016", "2017", "2018")
    PTdropped <- c("MNS2", "MNS4", "MNC5", "MNC6", "MNS2", "MNS4", "MNC5", "MNC6","TLS4")

    right = function(text, num_char) {
      substr(text, nchar(text) - (num_char-1), nchar(text))
    }
# 1) Load NWBIRD.ALL and NWSURV, merge into NWBIRD.ALL-------------------
    # Reformat to include ALL detections of BROADCAST species (see further down for reformatting to only survey detections).
    # I did a lot of data wrangling already in Excel. Created many columns of summarized data, and created extra
    # identifiers (ex. OutsideTimeONLY).
      NWBIRD.ALL <- read.csv("data/NWBIRD.csv") 
      
      NWBIRD.SURV <- NWBIRD.ALL %>%
        filter(OutTimeONLY != "Y" & OutTarget != "Y" & Previous !="Y") %>%
        filter(!PointID %in% PTdropped) %>% 
        filter(Species %in% bird.broadcast) %>%
        droplevels() %>% # drops unused levels. like AMCO, BLTE from Species list
        select(SurveyID, DetectionRecID, Year, Round, PointID, Species, Distance, OutTimeONLY, OutTarget, Previous)
      # Subset for only birds detected DURING the survey time, at the TARGET wetland, NOT previously detected
      # 1252 obs, now 1495 w 2017 data added; 1635 w 2018 data
      # 1635 ALL; 1051 SURV
      
      # NOTES---
      # NOTE: Manston points only surveyed 2 times, but had to be dropped since the spray areas were moved
      # NOTE: Twin Lakes, TLS4 only 3 surveys. 2015 R1 surveyed and realized not too far from spray; R2 not surveyed. 2016 surveyed R1 and R2
      # NOTE: East Park, don't throw out EPC3 only 3 surveys. 2015 R1 not established; new at R2. 2016 surveyed both R1 and R2.
      # pull out 283 obs of secondary species & "NO BIRDS" # 9 records where "NO BIRDS DETECTED"
      # AMBI 372, AMCO 16, BLTE 80, LEBI 39, NO BIRDS 9, PBGR 147, RNGR 14, SACR 9, SORA 321, VIRA 90, WIPH 2, WISN 130, YHBL 23 = 1252  
      # 835 obs; 983
      
      NWSURV.orig <- read.csv("data/NWSURV.csv") 
      visits <- NWSURV.orig %>%
        select(PointID, Year, Round, SurveyID) %>%
        pivot_wider(id_cols = PointID, names_from=c(Year, Round), values_from=SurveyID, values_fn = list(SurveyID = length)) 
      colnames(visits) <- c("PointID", "R20151", "R20152", "R20161", "R20162", "R20172", "R20182")
      # 321 surveys, 313 without Manston
      # 2015_1 2015_2 2016_1 2016_2 2017_2 2018_2 
      # 59     59     56     56     52     39 
      
      # Merge NWBIRD.SURV and NWSURV to create NWDATA.SURV
      NWDATA.SURV <- NWSURV.orig %>%
        filter(!SiteName == "Manston") %>%
        left_join(NWBIRD.SURV,by=c("SurveyID", "Year", "PointID", "Round")) %>%
        droplevels() %>%   
        mutate("Year" = factor(Year)) %>% # Recode factor(SurveyID) and factor(year)
        mutate("SurveyID" = factor(SurveyID)) %>%
        select(SurveyID, Year, Round, PointID, Treatment, Species, Distance, SiteName)
              # OutTimeONLY, OutTarget, Previous)  # is.na(Species) were surveys when no broadcast species detected
      
# 2) Transform data from NWDATA.SURV into NWPOINT-----------------------------
      # Tally number of visits per point and detections per pt
      
      # NWPOINT.SURV <- NWDATA.SURV %>%
      #   filter(!is.na(Species)) %>%
      #   filter(!SiteName == "Manston") %>% droplevels() %>%
      #   select(!OutTimeONLY, OutTarget, Previous, ) %>%
      #   pivot_wider(id_cols = c(PointID, SiteName, Treatment), names_from=c(Year, Round), names_sort = TRUE,
      #               values_from = SurveyID,
      #               values_fn = list(SurveyID = length),
      #               values_fill = 99) # 99 is no birds det AND/OR no survey that year
      # # 'NA' Species column is for 54 surveys when no broadcast species detected
      # (table(NWPOINT.SURV$PointID, NWPOINT.SURV$Year, NWPOINT.SURV$Round)) # Year and Round visits to each PointID
      # # write.csv(NWPOINT.SURV, file="data/NWPOINTwide_visits.csv")
      # # NOTE: this is wide dataset of rows of Points with columns of sum counts for each species in groups by visit (yrround)
      
      # Tally bird detections by species for each visit
      NWPOINT <- NWDATA.SURV %>%
        pivot_wider(id_cols = c(PointID, Treatment, Year, Round, SiteName), 
                    names_from=c(Species), names_sort = TRUE,
                    values_from = SurveyID,
                    values_fn = list(SurveyID = length),
                    values_fill = 0) 
      
      (table(NWPOINT$PointID, NWPOINT$Year)) # Year and Round visits to each PointID
      
      
# 3) Select max detection per year for species-------------------------------------
      # Timing of 2017 & 2018 surveys match R2 timing of 2015 & 2016 more closely
      # instead of max per year, use Round2 detections for all years in analysis
      NWPOINTR2 <- NWPOINT %>%
        filter(Round == 2) %>%
        select(!c('NA'))
      # from 1051 detections across all visits, to 529 Round2 detections 2015-2018
      
    # # Tally detections for each year, round. Get max det per year
      # NWPOINTmax <- NWDATA.SURV %>%
      # pivot_wider(id_cols = c(PointID, Treatment, Year, SiteName), names_from=c(Species, Round), names_sort = TRUE,
      #           values_from = SurveyID,
      #           values_fn = list(SurveyID = length),
      #           values_fill = 0)  %>%
      #   dplyr::rowwise() %>%
      #   dplyr::mutate(AMBI = max(AMBI_1, AMBI_2)) %>%
      #   dplyr::mutate(LEBI = max(LEBI_1, LEBI_2)) %>%
      #   dplyr::mutate(PBGR = max(PBGR_1, PBGR_2)) %>%
      #   dplyr::mutate(SORA = max(SORA_1, SORA_2)) %>%
      #   dplyr::mutate(VIRA = max(VIRA_1, VIRA_2)) %>%
      #   select(!AMBI_1:VIRA_2) %>%
      #   select(!c('NA_1', 'NA_2'))
      # # from 1051 detections across all visits, to 871 maximum detection per year 
      
      
      # format from wide to long
      NWPOINTR2.long <- NWPOINTR2 %>%
        pivot_longer(cols = AMBI:VIRA, names_to="Species", values_to="Count") %>% 
        mutate(Species = str_sub(as.character(Species), 1, 4))
      
# End NW data import and format******************************************************
      
      
      
# 4) Plot SURV and Round2 data summaries-----------------------------
      
  # Format data for plotting, species counts by visit---
      NWSURV.count <- NWDATA.SURV %>%
        select(PointID, Treatment, Year, Round, Species, SurveyID)%>%
        pivot_wider(id_cols =c(PointID, Treatment, Species), names_from=c(Year, Round), values_fill=0,
                    values_from=SurveyID, values_fn = list(SurveyID = length)) %>%
        complete(PointID, Species, fill=list('2015_1'=0, '2015_2'=0, '2016_1'=0, '2016_2'=0, '2017_2'=0, '2018_2'=0)) %>%
        filter(!is.na(Species))
      colnames(NWSURV.count) <- c("PointID", "Species","Treatment", "R20151", "R20152", "R20161", "R20162", "R20172", "R20182")
      kk <- intersect(grep("S", NWSURV.count$PointID), which(is.na(NWSURV.count$Treatment)))
      NWSURV.count$Treatment[kk] <- "Spray"
      kk <- intersect(grep("C", NWSURV.count$PointID), which(is.na(NWSURV.count$Treatment)))
      NWSURV.count$Treatment[kk] <- "Control"
      rm(kk)
      
      NWSURV.count$R20151[which(NWSURV.count$PointID %in% visits$PointID[is.na(visits$R20151)])] <- NA
      NWSURV.count$R20152[which(NWSURV.count$PointID %in% visits$PointID[is.na(visits$R20152)])] <- NA
      NWSURV.count$R20161[which(NWSURV.count$PointID %in% visits$PointID[is.na(visits$R20161)])] <- NA
      NWSURV.count$R20162[which(NWSURV.count$PointID %in% visits$PointID[is.na(visits$R20162)])] <- NA
      NWSURV.count$R20172[which(NWSURV.count$PointID %in% visits$PointID[is.na(visits$R20172)])] <- NA
      NWSURV.count$R20182[which(NWSURV.count$PointID %in% visits$PointID[is.na(visits$R20182)])] <- NA
      
      NWSURV.count.long <- NWSURV.count %>%
        pivot_longer(cols=c(R20151:R20182), names_to="Visit", values_to="Count") %>%
        filter(!is.na(Count))
      
      library(plotrix)
      mm <- NWSURV.count.long %>% 
        dplyr::group_by(Species, Visit) %>%
        dplyr::summarise(mean=mean(Count, na.rm=T), stderr=std.error(Count, na.rm=T)) %>%
        mutate("lower" = mean - stderr, "upper" = mean + stderr)
      
      ggplot(mm, aes(x=Visit, y=mean, group=Visit, colour = factor(Visit))) + 
        facet_wrap(~Species, ncol=1, scales = "free")+
        geom_point()+
        # stat_summary(fun = mean, geom="point") + 
        geom_errorbar(aes(ymin = mm$lower, ymax = mm$upper), width = 0.2)+
        ggtitle("Means of counts by species and visits (pooled across sites and treatments)")+
        ggsave("output/meanspeciescountpervisit.png", height=9, width=6)
      rm(mm)
      dev.off()
      
      
      # add trt
      mm <- NWSURV.count.long %>% 
        dplyr::group_by(Species, Visit, Treatment) %>%
        dplyr::summarise(mean=mean(Count, na.rm=T), stderr=std.error(Count, na.rm=T)) %>%
        mutate("lower" = mean - stderr, "upper" = mean + stderr)
      
      ggplot(mm, aes(x=Visit, y=mean, group=Visit, colour = factor(Visit))) + 
        facet_wrap(Species~Treatment, ncol=2, scales = "fixed")+
        geom_point()+
        # stat_summary(fun = mean, geom="point") + 
        geom_errorbar(aes(ymin = mm$lower, ymax = mm$upper), width = 0.2)+
        ggtitle("Means of counts by species and visits (pooled across sites and treatments)")
      ggsave("output/meanspeciescountpervisit.treatment.png", height=9, width=12)
      rm(mm)
      dev.off()
      
    rm(NWSURV.count, NWDATA.SURV, NWSURV.orig, NWBIRD.ALL, NWBIRD.SURV, NWSURV.count.long)
  
 # Plot Round 2 data summaries---
      # count by year, with species in diff color lines
      ggplot(NWPOINTR2.long, aes(Year, Count, group=Species, colour = factor(Species))) + 
        stat_summary(fun = mean,lwd=2, geom="line")

      # by Treatment (all sites lumped together)
      ggplot(NWPOINTR2.long, aes(Year, Count, group=Treatment, colour = factor(Treatment))) + 
        stat_summary(fun = mean, lwd=2, geom="line") + 
        facet_wrap(~Species) + ggtitle("Count of Species over Years, by treatment")

      # by sites (all species lumped together)
      ggplot(NWPOINTR2.long, aes(Year, Count, group=Treatment, colour = factor(Treatment))) + 
        stat_summary(fun = mean,lwd=2, geom="line") + 
        # facet_wrap(NWPOINTR2.long$Site)  + 
        ggtitle("Total bird counts (across species) at WMAs over Years, by treatment")

      # species by sites and treatment
      ggplot(NWPOINTR2.long, aes(Year, Count, group=Treatment, colour = factor(Treatment))) + 
        stat_summary(fun = mean,lwd=2, geom="line") + 
        facet_wrap(Species~SiteName, ncol=9) 

      # # get means for species of count per point by SiteName, Treatment, and year
      # meansitez <- NWPOINTR2.long %>% 
      #   dplyr::group_by(Species, Year, Treatment, SiteName) %>%
      #   dplyr::summarize(mean(Count, na.rm=T)) 
      # colnames(meansitez)[5] <- "MeanSite"
      # write.csv(meansitez, "data/meansitez.csv", row.names = F)
      # # can this go into t-tests? 
      # # no, I need to use PAIRED ttest to compare Spray:Control within WMAs (not across all of them)
      # # also, I need to feed in the values at point level, t.test function can calc variance

      
      # interesting way to summarize DiffMean over the years (across sites)
      ggboxplot(NWDiffmean.wide, x = "Year", y = "DiffMean", color = "Species", palette = birdcolors,
                ylab = "Differences between mean counts (spray-control)", xlab = "Species")
      
      ggpar(ggboxplot(NWDiffmean.wide, x = "Year", y = "DiffMean", color = "Species", palette = birdcolors,
                      ylab = "Differences between mean counts (spray-control)", xlab = "Year", size=1),
            cex=1,font.legend = c(10, "bold", "black"),
            main="Difference in mean counts between treatment sites within WMAs\nbefore herbicide (2015) to 3 years after",
            font.main = c(14, "bold", "black")) 
      
      
      ggpar(ggboxplot(NWDiffmean.wide, x = "Year", y = "DiffMean", color = "Species", palette = birdcolors,
                      ylab = "Differences between mean counts (spray-control)", xlab = "Year", size=1.0),
            cex=1.0, font.legend = c(10, "bold", "black"),
            main="Change in differences between treatment within WMAs",
            font.main = c(14, "bold", "black")) + 
        stat_summary(fun=mean, geom="point", shape=24, size=2, color="black",fill="darkgrey", aes(group = Year)) +
        facet_wrap(~Species)
      
      
      # a few data summaries
      summary(NWDiffmean.wide)
      # describe(NWDiffmean.wide)
      # describeBy(NWDiffmean.wide, c("Species"))
      
      # # means within years by treatment
      # library(lattice)
      # xyplot(NWDiffmean$meanct ~ NWDiffmean$Year, bg='black', cex=1.5,
      #        col.line=c("green","slateblue"), grid=T, type=c("p", "r"), lty=1,
      #        group=NWDiffmean$Treatment, pch=c(16,17), col=c("green","slateblue"),
      #        main="Mean detections of species, by treatment and year",
      #        key=list(space="right", lines=list(col=c("green","slateblue"), pch=c(16,17)), text=list(c("control"," spray")))
      #        )
      # 
      # # means within years by species by Treatment
      # xyplot(NWDiffmean$meanct ~ NWDiffmean$Year, bg='black', cex=1.5,
      #        col.line=birdcolors, grid=T, type=c("p", "r"), lty=1,
      #        group=NWDiffmean$Species, pch=c(15,16,17,18,19), col=birdcolors,
      #        main="Mean detections of species, by treatment and year", labels=NWDiffmean$Treatment,
      #        key=list(space="right", points=list(pch=c(15,16,17,18,19), col=birdcolors), text=list(bird.broadcast))
      #        )
      # 
      # # means within the years by species PANELS by Treatment
      # xyplot(NWDiffmean$meanct ~ NWDiffmean$Year | NWDiffmean$Species,
      #        pch=(as.integer(NWDiffmean$Treatment)+15), col=c("green","slateblue"), bg='black', cex=1.5,
      #        col.line=c("green","slateblue"), grid=T, type=c("p", "r"), lty=1,
      #        group=(as.integer(NWDiffmean$Treatment)+15), main="Mean detections of species, by treatment and year",
      #        key=list(points=list(col=c("green","slateblue"), pch=c(16,17), cex=1.5),
      #                 space="top", text=list(c("Control","Spray"))))
      # # dev.copy(png, file="output/figures/speciespanel_meansNW.png", width = 900, height = 700)
      # dev.off()
      # 
      # # means within the years by Treatment PANELS
      # xyplot(NWDiffmean$meanct ~ NWDiffmean$Year | NWDiffmean$Treatment,
      #        pch=c(15,16,17,18,19), col=c("green","slateblue"), bg='black', cex=1.5,
      #        col.line=c("green","slateblue"), grid=T, type=c("p", "r"), lty=1,
      #        group=(as.integer(NWDiffmean$Treatment)+15), main="Mean detections of species, by treatment and year")
      # 
      # # # diffmeans over years by species (Treatment lumped). summarize into df and plot
      # xyplot(NWDiffmean$meanct ~ NWDiffmean$Year, 
      #        groups=NWDiffmean$Species, col.line=birdcolors, 
      #        grid=T, type=c("p", "r"), lty=1,bg='black', cex=1.5,  pch=16, col=rep(birdcolors),
      #        main="Difference in counts between Spray and Control\nby species and year",
      #        key=list(space="right", points=list(col=rep(birdcolors), pch=1), text=list(bird.broadcast)))
      # 
      # # diffmeans over Year by species PANELS (Treatment lumped)
      # xyplot(NWDiffmean$meanct ~ NWDiffmean$Year | NWDiffmean$Species, pch=16, col="magenta", bg='black', cex=1.5,
      #        col.line="grey", grid=T, type=c("p", "r"), lty=1,
      #        main="Difference in counts between Spray and Control"
      #        )
      # # dev.copy(png, file="output/figures/speciespanel_diffmeansNW.png", width = 900, height = 700)
      # dev.off()
      
      
# 5) Calculate difference in detections between treatment and control-------------------------
    # make new dataframe to calculate DIFF btn treatment site mean dets at a WMA
    # the values for analysis are the difference (btn treatment points) in mean detections within each WMA per year
    # 1. Calc mean detections at spray at each WMA, calc mean detections at control at each WMA for each year
    # 2. Take the difference in means at each WMA for each year
    # 3. T.test diffmeans before-to-each year after paired by WMA (i.e. compare 2015 to 2016, 2015 to 2017, 2015 to 2018)
    
      
    # calculate the differences (btn ave spray and ave control) for each WMA  
    #NOTE!!: DiffMean = CONTROL - SPRAY
      
      # use aggregate(mean), and rowwise(Spray - Control)
      NWDiffmean <- aggregate(list(meanct=NWPOINTR2.long$Count), 
                              list(Species=NWPOINTR2.long$Species, 
                                   SiteName=NWPOINTR2.long$SiteName, 
                                   Year=NWPOINTR2.long$Year, 
                                   Treatment=NWPOINTR2.long$Treatment), 
                              mean, na.rm=T) 
      NWDiffmean.wide <- NWDiffmean %>%
        pivot_wider(id_cols = c(Species, SiteName, Year), names_from=Treatment, values_from=meanct) %>%
        dplyr::rowwise() %>%
        mutate(DiffMean = Spray - Control)
      
      # # same calculations, but use function diffmean()
      # library(mosaic)
      # NWDiffmean2 <- NWPOINTR2.long %>%
      #   dplyr::group_by(Species, Year, SiteName) %>%
      #   dplyr::summarize(diffmean=diffmean(Count~Treatment, na.rm=T))
     
      
# 6) T.tests to compare mean Count of Treatment diff at pts, are different between years?------------
      # http://www.sthda.com/english/wiki/paired-samples-t-test-in-r
      # library(PairedData)
      # pd <- paired(before, after)
      # plot(pd, type = "profile") + theme_bw()
      
      # try looping through the species and years
      ttestoutput <- list()
      ii=1
      for(ss in 1:length(bird.broadcast)){    
        for(yy in 1:3){
          thissp <- bird.broadcast[ss]
          fromyr <- "2015"
          toyr <- years[1+yy]
          
          sitez <- unique(NWDiffmean.wide$SiteName[which(NWDiffmean.wide$Year == toyr)]) # subset to compare WMAs sampled in "toyr"
          
          ll <- NWDiffmean.wide[which(NWDiffmean.wide$Species == thissp & NWDiffmean.wide$SiteName %in% sitez), c("Year","DiffMean")]
          before <- ll$DiffMean[which(ll$Year==fromyr)]
          after <- ll$DiffMean[which(ll$Year==toyr)]
          
          ttestoutput[[ii]]<- t.test(before, after, paired = TRUE, na.action=na.omit)
          ii=ii+1
        }
      }
      rm(ss,yy, thissp, fromyr, toyr, ll, before, after, ii, sitez)
      
      ttest2 <- sapply(ttestoutput, function(x) {
        c(x$estimate[1],
          x$statistic[1],
          ci.lower = x$conf.int[1],
          ci.upper = x$conf.int[2],
          p.value = x$p.value,
          parameter = x$parameter)
      })
      # omg, I can't believe that worked. Thanks internet!
      # https://stackoverflow.com/questions/21840021/grabbing-certain-results-out-of-multiple-t-test-outputs-to-create-a-table
      ttest3 <- rbind(Species=c(rep(bird.broadcast, each=3)), 
                  YrCompare=c(rep(c("2015to2016", "2015to2017", "2015to2018"))),
                  as.data.frame(ttest2))
      write.csv(ttest3, "output/ttestresults.csv", row.names = T)

      rm(ttestoutput, ttest2)
        
# 7) Plot ttests between pair-years--------------------------
    # from https://stats.stackexchange.com/questions/190223/how-to-visualize-independent-two-sample-t-test
    # Calc and plot between each of the years (2015-2016, 2016-2017, 2017-2018)
      # NWDiffmean
      # NWDiffmean.wide
      baselines <- aggregate(list(DiffMean=NWDiffmean.wide$DiffMean[NWDiffmean.wide$Year=="2015"]), 
                             list(Species=NWDiffmean.wide$Species[NWDiffmean.wide$Year=="2015"]), mean, na.rm=T)
      # baselines$Species <- str_replace_all(baselines$Species, c("AMBI"="American bittern", "PBGR"="Pied-billed grebe", 
      #                                                           "LEBI"= "Least bittern", 
      #                                                           "SORA"="Sora", "VIRA"="Virginia rail"))
      birdcolors2 <- c("#F5E771", "#556471","#F0E8E3", "#716B5B", "#748BA7")
      
      plotdat <- NWDiffmean.wide %>%
        as.data.frame() %>%
        dplyr::group_by(Species, Year) %>%
        dplyr::summarise(mean.DiffMean = mean(DiffMean, na.rm = TRUE),
                sd.DiffMean = sd(DiffMean, na.rm = TRUE),
                n.DiffMean = n(),
                se.DiffMean=std.error(DiffMean)) %>%
        dplyr::mutate(se.DiffMean = sd.DiffMean / sqrt(n.DiffMean),
               lower.ci.DiffMean = mean.DiffMean - qt(1 - (0.05 / 2), n.DiffMean - 1) * se.DiffMean,
               upper.ci.DiffMean = mean.DiffMean + qt(1 - (0.05 / 2), n.DiffMean - 1) * se.DiffMean)
      
      ggplot(plotdat, aes(y=mean.DiffMean, x=Year, fill=Species))+
        scale_fill_manual(values=birdcolors2)+
        geom_point(shape=21, size=3)+
        # geom_errorbar(aes(ymin=lower.ci.DiffMean, ymax=upper.ci.DiffMean))+
        geom_errorbar(aes(ymin=mean.DiffMean - se.DiffMean, ymax=mean.DiffMean + se.DiffMean), width=0.2)+
        facet_wrap(~Species, nrow=1)+
        geom_hline(yintercept=0)+
        ylim(-1.5, 2)+
        theme_bw()+
        theme(axis.text.x = element_text(angle = 45, hjust=1))+
        ggtitle("Difference in counts between spray and control, from before to each year after")+
        geom_hline(data=baselines, aes(yintercept = DiffMean), 
                   linetype = 2, col="darkgrey", lwd=1.1, alpha=1 )+
        geom_hline(yintercept = 0, col="grey")
      
      
      ttestlong <- as.data.frame(t(ttest3))
      colnames(ttestlong)[3] <- "Meandiff"
      ttestlong <- ttestlong %>%
        mutate("Meandiff2"=round(as.numeric(as.character(ttestlong$Meandiff)), 3), 
               "CIL"=round(as.numeric(as.character(ttestlong$ci.lower)), 3),
               "CIU"=round(as.numeric(as.character(ttestlong$ci.upper)), 3))
      ttestlong$Year <- right(as.character(ttestlong$YrCompare), 4)
      
      ggplot(ttestlong, aes(y=Meandiff2, x=YrCompare))+
        geom_point()+
        geom_errorbar(aes(ymin=CIL, ymax=CIU))+
        facet_wrap(~Species, nrow=1)+
        geom_hline(yintercept=0)+
        theme_bw()+
        theme(axis.text.x = element_text(angle = 45, hjust=1))+
        ggtitle("Change in mean difference between spray and control for each year interval")
      
      
  
    # GGBARPLOT---
      library(RColorBrewer); library(ggplot2); library(ggpubr)
      library(gplots); library(stringi); library(stringr)
      
      birdcolors2 <- c("#F5E771", "#556471","#F0E8E3", "#716B5B", "#748BA7")
      
      # Make plots for thesis
      # barplot results of ttests. panels by species. each year's trt diffs of means in bar. add * for statistically
      # different from "before" year. also add dotted horizontal line at value for "before" year for baseline
      # dat <- meanstatz[,c("Year", "Species", "mean", "se")]
      
      plotdat2 <- NWDiffmean.wide %>% droplevels()
      plotdat2$Species <- str_replace_all(plotdat2$Species, c("AMBI"="American bittern", "PBGR"="Pied-billed grebe", 
                                                                "LEBI"= "Least bittern", 
                                                                "SORA"="Sora", "VIRA"="Virginia rail"))
      baselines <- aggregate(list(DiffMean=NWDiffmean.wide$DiffMean[NWDiffmean.wide$Year=="2015"]), 
                             list(Species=NWDiffmean.wide$Species[NWDiffmean.wide$Year=="2015"]), mean, na.rm=T)
      baselines$Species <- str_replace_all(baselines$Species, c("AMBI"="American bittern", "PBGR"="Pied-billed grebe", 
                                                       "LEBI"= "Least bittern", 
                                                       "SORA"="Sora", "VIRA"="Virginia rail"))
      # geom point with 95% confidence interval
      pp <- ggerrorplot(plotdat2, x = "Year", y = "DiffMean", 
                        desc_stat = "mean",  
                        shape=5, size=1.2,
                        add = "mean_ci", add.params = list(color="black"),
                        color = "Species", palette = birdcolors2, 
                        fill="black", 
                        legend="none", show.legend=FALSE, 
                        ylim = c(-1.6,2.2),
                        font.x=c(18, "bold", "black"),
                        error.plot = "errorbar") +
        facet_grid(~Species)+
        theme_bw() + guides(fill=FALSE) + 
        labs(y="Difference in mean counts", x = "Year") +
        theme(axis.text = element_text(size=rel(1.2)),
              axis.title = element_text(size = rel(1.5)),
              strip.text.x=element_text(size=rel(1.5)),
              legend.position = "none")+
        stat_compare_means(ref.group = "2015", label = "p.signif", 
                           label.y = 2.2, hide.ns=T, method="t.test", 
                           method.args = list(alternative=c("two.sided"), paired=TRUE, groups=~Species)) 
      pp + geom_hline(data=baselines, aes(yintercept = DiffMean), 
                            linetype = 2, col="darkgrey", lwd=1.1, alpha=1 )+
        geom_hline(yintercept = 0, col="grey")+
        stat_summary(geom="point", shape=21, size=5, col="black", 
                     fill=rep(birdcolors2, each=4), fun="mean")
      # + geom_text(data = ann_text,label = "*")
      ggsave("output/meandiff_chapter3Results.pts2_CI95.png")
      dev.off()

      # geom point with mean and se
      pp <- ggerrorplot(plotdat2, x = "Year", y = "DiffMean", desc_stat = "mean",  
                        shape=5, size=1.2,
                        add = "mean_se", add.params = list(color="black"),
                        color = "Species", palette = birdcolors2, fill="black", 
                        legend="none", show.legend=FALSE, ylim = c(-1.2,1.6),
                        font.x=c(18, "bold", "black"),
                        error.plot = "errorbar") +
        facet_grid(~Species)+theme_bw() + guides(fill=FALSE) + 
        labs(y="Difference in mean counts between treatments ± s.e.", x = "Year") +
        theme(axis.text = element_text(size=rel(1.2)),
              axis.title = element_text(size = rel(1.5)),
              strip.text.x=element_text(size=rel(1.5)),
              legend.position = "none")+
        stat_compare_means(ref.group = "2015", label = "p.signif", label.y = 1.6, hide.ns=T,
                           method="t.test")    
      pp + geom_hline(data=baselines, aes(yintercept =DiffMean), 
                      linetype = 2, col="darkgrey", lwd=1.1, alpha=1 )+
        geom_hline(yintercept = 0, col="grey")+
        stat_summary(geom="point", shape=21, size=5, col="black", fill=rep(birdcolors2, each=4), 
                     fun="mean")
      # ggsave("output/figures/meandiff_chapter3Results.pts2_se.png")
      dev.off()
      #NOTE: the stat compare means adds * to bars of years that are stat diff from reference first year,
      # otherwise "ns"= not statistically significant
   
      # https://www.r-bloggers.com/add-p-values-and-significance-levels-to-ggplots/
      # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
      # this code wants the dataset, not the result values
      ggbarplot(NWDiffmean.wide, x = "Year", y = "DiffMean", add = "mean_se",
                color = "Year", palette = birdcolors2, fill="Year")+
        stat_compare_means(aes(group = c(Species, Year)), method="t.test", ref.group = 2015,
                label = "p.signif", label.y = max(plotdat2$DiffMean*1.1))+
        facet_wrap(NWDiffmean.wide$Species)
      
   
      plotz <- ggplot(plotdat, aes(Year, mean.DiffMean, group=Species)) + 
        geom_col() +  
        geom_errorbar(aes(ymin = mean.DiffMean - se.DiffMean, ymax = mean.DiffMean + se.DiffMean), width=0.2)
      plotz + labs(y="Difference in DiffMean counts between treatments ± s.e.", x = "Year") + 
        theme_bw()+
        facet_grid(~Species)
      
      
     ggplot(data = plotdat, aes(x = Year, y = mean.DiffMean))+
        # this second line defines the type of graph
        geom_col()+
        # third line adds error bars
        geom_linerange(aes(ymin = mean.DiffMean - se.DiffMean, ymax = mean.DiffMean + se.DiffMean))+
        # Make this split out by species
        facet_grid(~Species)
      
        
      
 
## END---------------------------------------------------------------------------------------------------------

library(ezknitr)
ezspin("03_NW_herbicide.R", out_dir = "output", fig_dir = "figures", keep_md=FALSE)