#'---
#' title: "Analysis of Ditch Data - Plants"
#' author: "Mike Verhoeven"
#' output: 
#'    html_document:
#'       toc: true
#'       theme: default
#'       toc_depth: 2
#'       toc_float:
#'           collapsed: false
#'---

# header ------------------------------------------------------------------

#' # Preface
#' 
#' This code will: 
#' 1. Run models to evaluate the effects of age, planting, etc. on ditch 
#' plant floral communities
#' 2. Generate all tables, figures, and statistics 
#' 
#'  
#'  Notes: 
#'  1. I still have a few column references that happen by numerical position. 
#'  Adding or removing columns from input data will break these calls. 
#'  2. There are a few warnings throughout the code, especially where data types
#'  are coerced during munging operations or where data are omitted from plot 
#'  calls for missingness. As of submission to the repo, we have reviewed these 
#'  and none were deemed problematic.
#'  
#'  
#' 
#' ## Load libraries:

# load libraries ----------------------------------------------------------

# +warning=FALSE, message=FALSE 
library(data.table)
library(ggplot2)
library(stringr)
library(mvabund)
library(glmm)
library(lme4)
library(broom)
library(epiDisplay)
library(ggeffects)
library(effects)
library(rstanarm)
library(vegan)
library(sjPlot)
library(nlme)
library(lmerTest)
library(ggpubr)
library(ggnewscale)
library(cowplot)


# load functions ----------------------------------------------------------

#make a fn for replacing values in many cols
f_dowle3natoblank = function(DT, x) {
  # or by number (slightly faster than by name) :
  for (j in x)
    set(DT,which(is.na(DT[[j]])),j,"")
}

f_dowle3natozeros = function(DT, x) {
  # or by number (slightly faster than by name) :
  for (j in x)
    set(DT,which(is.na(DT[[j]])),j,"0")
}


# load data ---------------------------------------------------------------
    #set wd to data location
    #plant data
    l_plant_obs <- fread( file = "veg_occurrences.csv")
    
    #species names and identities:
    plant_taxa <- fread(file = "plants_taxo_key_status.csv")
    
    #site covariates:
    site_data <-  fread(file = "site_attributes.csv")
      
    # seedmix matrix
    seedmix <- fread(file = "seedmix_matrix.csv")
    
    


    

# prep:create site-date data table ----------------------------------------------------

# we should have 21 construction projects, sampled 3 sites within each, across 6 visits = 378 date-site combinations
    
    #date_site_sample
    ## calculate some site-date veg stats & make a new site-date data table
    
    # number of samples per site
    l_plant_obs[ , length(unique(date)) , .(project, trt, site) ] #count dates for each site
    l_plant_obs[ , length(unique(date)) , .(project, trt) ][ V1 < 6] # which ones have missing dates? 
    l_plant_obs[site %in% c("2typ", "16typ"),  unique(date), .(site, project, trt) ] #16 typ was turned into a construction site midsummer, 2 typ has a missing datasheet
    
    # of quadrats per site_date
    quads_per_site_date <- l_plant_obs[ , .("N" = length(unique(quadrat))), .(date, site, trt, project)]
    # of flowering forb observations made by site_date
    forbs_per_site_date <- l_plant_obs[!taxon == "nofloweringveg" &  taxon %in% 
                                         plant_taxa[ `grass/flower` == "flower" , Taxon  , ],#choose rows where these conditions are met,
                                       .("n_forb_observed" = .N) , #count how many rows & call that count n_forb_observed
                                       .(date, site, trt, project)] #and do that counting by unique combos of date, trt, project (site is redund for trt proj)
    # forb richness by site_date
    forbs_richness_per_site_date <- l_plant_obs[!taxon == "nofloweringveg" &  taxon %in%
                                                  plant_taxa[ `grass/flower` == "flower" , Taxon  , ],
                                                .("forb_richness" = length(unique(taxon))) ,
                                                .(date, site, trt, project)]
    
    # number quads with floral resource by site_date
    n_q_floral_per_site_date <- l_plant_obs[!taxon == "nofloweringveg"  &  taxon %in%
                                              plant_taxa[ `grass/flower` == "flower" , Taxon  , ],
                                            .(n_quad_floral_res = length(unique(quadrat))) ,
                                            .(date, site, trt, project)]
    # total richness by site_date
    total_richness_per_site_date <- l_plant_obs[!taxon == "nofloweringveg",
                                                .("total_richness" = length(unique(taxon))) ,
                                                .(date, site, trt, project)]
    
    
    # merge each of these into a date_site level table:
    date_site_sample <- merge(quads_per_site_date,# x table
                              forbs_per_site_date[, j = .(date,site,n_forb_observed) ,], #y table - here we extract only the cols of interest with by = .() to prevent a messy merge
                              by = c("date", "site"), all = T)
    # merge more:
    date_site_sample <- merge(date_site_sample, forbs_richness_per_site_date[,.(date,site,forb_richness) ,], by = c("date", "site"), all = T)
    date_site_sample <- merge(date_site_sample, total_richness_per_site_date[,.(date,site,total_richness) ,], by = c("date", "site"), all = T)
    date_site_sample <- merge(date_site_sample, n_q_floral_per_site_date[,.(date,site,n_quad_floral_res) ,], by = c("date", "site"), all = T)

    #cleanup the building blocks for that merge
    rm(total_richness_per_site_date, quads_per_site_date, forbs_per_site_date, forbs_richness_per_site_date, n_q_floral_per_site_date)
    
    #populate NAs with zeroes - in all these cases, an NA from calculations above needs to be a zero:
    f_dowle3natozeros(date_site_sample, c("forb_richness","n_forb_observed", "total_richness","n_quad_floral_res" ))
    
    
    # add the plant obs matrix to the site-date table (counts each species obs, merges into the date_site table, then reshapes dataset to wide):
    plant_matrix_date_site_sample <- dcast(merge(date_site_sample,
                                                 l_plant_obs[ , .("count" = .N) , .(site,date,taxon)], #this call pulls out a count for each species at each point
                                                 by = c("date", "site"), all = TRUE),
                                           date + site + trt + project + N + n_forb_observed + forb_richness + total_richness + n_quad_floral_res ~ taxon,
                                           value.var = "count")
    
    #polish up data formats
    plant_matrix_date_site_sample[ , trt := as.factor(trt) , ]
    plant_matrix_date_site_sample[ , site := as.factor(site) , ]
    
    names(plant_matrix_date_site_sample)
    
    #more NA to zeros
    f_dowle3natozeros(plant_matrix_date_site_sample, c(1:142))
  
    # convert counts to proportion data
    cols <- names(plant_matrix_date_site_sample)[ 10:142]
    plant_matrix_date_site_sample[ , (cols) := lapply(.SD, function(x){x/N}) , .SDcols = cols ] # divide number of quads where each species was observed by total N quads sampled

    summary(plant_matrix_date_site_sample)
    
    #drop the construction sub-steps
    rm(date_site_sample)
    
# prep:merge datasets ----------------------------------------------------------  
    
    #bring in site data
    plant_matrix_date_site_sample <- plant_matrix_date_site_sample[ site_data, on = .(site=Site)]
    
    #how many sites per seedmix?
    
    plant_matrix_date_site_sample[ , length(unique(site)) , .(Seed_mix) ]
    plant_matrix_date_site_sample[ , length(unique(site)) , .(Seed_mix) ][c(1:2,4:11),Seed_mix]
    
    
    #pull in the seedmix composition:
    
    seedmix_l <- melt(seedmix,
                      id.vars = c("CommonName", "ScientificName"),
                      measure.vars = plant_matrix_date_site_sample[ , length(unique(site)) , .(Seed_mix) ][!Seed_mix == "",Seed_mix],
                      variable.name = "seedmix")[!is.na(value)]
    
    # write seeding table to file, include only mixes that were used
    seedmix[ , .SD  , .SDcols = c(c("CommonName", "ScientificName"),plant_matrix_date_site_sample[ , length(unique(site)) , .(Seed_mix) ][!Seed_mix == "",Seed_mix])]
    
    # fwrite(seedmix[ , .SD  , .SDcols = c(c("CommonName", "ScientificName"), plant_matrix_date_site_sample[ , length(unique(site)) , .(Seed_mix) ][!Seed_mix == "",Seed_mix])], file = "data&scripts/data/output/seed_matrix.csv")
    
    seedmix_species_matrix <- dcast(seedmix_l, seedmix ~ ScientificName)
    
    #paste an "s" in front of seedmix species:
    names(seedmix_species_matrix) <- paste("s",names(seedmix_species_matrix), sep = "_")
    
    #merge seedmix species matrix to sampling matrix:
    cols <- names(seedmix_species_matrix)[2:80] #which columns are seeding matrix?
    plant_matrix_date_site_sample[seedmix_species_matrix, on = .(Seed_mix = s_seedmix), (cols) := mget(paste0("i.", cols)) ]
    
    plant_matrix_date_site_sample[ , length(unique(Seed_mix)) , .(site) ]
    
    #populate seedmix data with zeros:
    names(plant_matrix_date_site_sample)# use this to check next line of code for alignment with seedmix data (consider updates here to fix hard code that relies on col numbers)
    f_dowle3natozeros(plant_matrix_date_site_sample, c(163:241))
    
    
    names(plant_matrix_date_site_sample) <- gsub(" ","_", names(plant_matrix_date_site_sample))
    
# prep:univariate response metrics ---------------------------------------------

#' We have a few metrics already for variables describing the plants in a
#' univariate way. At this point however, we need to add in a "diversity" metric
#' which will think about community evenness. We could work in an "area"
#' effect, and can do that with species density (richness/area). We could also
#' do about 10 other things. 
#' 
#' We have already: Forb richness, Total richness, number of quads with a floral
#' resource, n forbs observed (count of flower-species-quadrats), 
#' 
#' We will add: Simpsons diversity (and split nat v introduced), Shannon
#' diversity (and split nat v introduced), split current richness and floral
#' richness by nat v introduced.
#'         
    names(plant_matrix_date_site_sample)
    
    #tag all univariate metrics with um_xxx
    names(plant_matrix_date_site_sample)[6:9] <- paste("um_", sep = "", names(plant_matrix_date_site_sample)[6:9])
    
    #add shannon div and simpsons div
    plant_matrix_date_site_sample[ , um_shannon := diversity(plant_matrix_date_site_sample[,10:139],index = "shannon")]
    plant_matrix_date_site_sample[ , um_simpsons := diversity(plant_matrix_date_site_sample[,10:139],index = "simpson")]#Both variants of Simpson's index are based on D = sum p_i^2. Choice simpson returns 1-D and invsimpson returns 1/D.
    
    # now restrict to floral diversity (drop grasses)
    #
    #extract just the floral div data:peel off the position of the names of the flowers in the plant_taxa list
    floralcols <- names(plant_matrix_date_site_sample)[names(plant_matrix_date_site_sample) %in% plant_taxa[`grass/flower`== "flower", Taxon,]]

    #test the calculation
    diversity(plant_matrix_date_site_sample[, .SD , .SDcols = floralcols  ],index = "shannon")
    
    #run simps and shannon's floral
    plant_matrix_date_site_sample[ , um_shannons_floral := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = floralcols  ],index = "shannon"), ]
    plant_matrix_date_site_sample[ , um_simpsons_floral := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = floralcols  ],index = "simpson"), ]
    
    #now split these all into native v introduced
    #extract just the NATIVE & just INTRODUCED floral div data:
    nativecols <- names(plant_matrix_date_site_sample)[names(plant_matrix_date_site_sample) %in% plant_taxa[`Minnesota Native?`== "native", Taxon,]]
     
    introcols <-  names(plant_matrix_date_site_sample)[names(plant_matrix_date_site_sample) %in% plant_taxa[`Minnesota Native?`!= "native", Taxon,]]
    
    #add these diversity metrics to the dataset
    plant_matrix_date_site_sample[ , um_shannons_nat := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = nativecols  ],index = "shannon"), ]
    plant_matrix_date_site_sample[ , um_shannons_int := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = introcols  ],index = "shannon"), ]
    plant_matrix_date_site_sample[ , um_simpsons_nat := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = nativecols  ],index = "simpson"), ]
    plant_matrix_date_site_sample[ , um_simpsons_int := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = introcols  ],index = "simpson"), ]
    
    #split richness by introduced/native:
    plant_matrix_date_site_sample[ , um_richness_nat := rowSums(plant_matrix_date_site_sample[, .SD , .SDcols = nativecols  ]!=0), ]
    plant_matrix_date_site_sample[ , um_richness_int := rowSums(plant_matrix_date_site_sample[, .SD , .SDcols = introcols  ]!=0), ]
    
    
    
    #extract just the NATIVE & just INTRODUCED floral div data:
    nativefloralcols <- names(plant_matrix_date_site_sample)[names(plant_matrix_date_site_sample) %in% plant_taxa[`grass/flower`== "flower" & `Minnesota Native?`== "native", Taxon,]]
    introfloralcols <-  names(plant_matrix_date_site_sample)[names(plant_matrix_date_site_sample) %in% plant_taxa[`grass/flower`== "flower" & `Minnesota Native?`!= "native", Taxon,]]
    
    #add these diversity metrics to the dataset
    plant_matrix_date_site_sample[ , um_shannons_floral_nat := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = nativefloralcols  ],index = "shannon"), ]
    plant_matrix_date_site_sample[ , um_shannons_floral_int := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = introfloralcols  ],index = "shannon"), ]
    plant_matrix_date_site_sample[ , um_simpsons_floral_nat := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = nativefloralcols  ],index = "simpson"), ]
    plant_matrix_date_site_sample[ , um_simpsons_floral_int := diversity(plant_matrix_date_site_sample[, .SD , .SDcols = introfloralcols  ],index = "simpson"), ]
    #split richness by introduced/native:
    plant_matrix_date_site_sample[ , um_richness_floral_nat := rowSums(plant_matrix_date_site_sample[, .SD , .SDcols = nativefloralcols  ]!=0), ]
    plant_matrix_date_site_sample[ , um_richness_floral_int := rowSums(plant_matrix_date_site_sample[, .SD , .SDcols = introfloralcols  ]!=0), ]

    #convert year to age
    plant_matrix_date_site_sample[ , age := 2021-Year , ]
    plant_matrix_date_site_sample[trt == "typ", age:= NA]
    
    #convert date to julian (easier to apply a quadratic to numeric than to a date object)
    plant_matrix_date_site_sample[ , jday := yday(as.IDate(date, format = "%m/%d/%Y")) , ]
    
    #add native and nonnative n quads w/ floral resources
    n_q_nat_floral_per_site_date <- l_plant_obs[!taxon == "nofloweringveg"  &  taxon %in% plant_taxa[ `grass/flower` == "flower" & `Minnesota Native?` == "native" , Taxon  , ], .(um_n_quad_floral_res_nat = length(unique(quadrat))) , .(date, site, trt, project)]
    n_q_int_floral_per_site_date <- l_plant_obs[!taxon == "nofloweringveg"  &  taxon %in% plant_taxa[ `grass/flower` == "flower" & `Minnesota Native?` != "native" , Taxon  , ], .(um_n_quad_floral_res_int = length(unique(quadrat))) , .(date, site, trt, project)]
    plant_matrix_date_site_sample <- merge(plant_matrix_date_site_sample, n_q_nat_floral_per_site_date[,.(date,site,um_n_quad_floral_res_nat) ,], by = c("date", "site"), all.x = T)
    plant_matrix_date_site_sample <- merge(plant_matrix_date_site_sample, n_q_int_floral_per_site_date[,.(date,site,um_n_quad_floral_res_int) ,], by = c("date", "site"), all.x = T)
    
    #cleanup
    rm(n_q_nat_floral_per_site_date, n_q_int_floral_per_site_date)
    
    #populate blanks with zeroes
    f_dowle3natozeros(plant_matrix_date_site_sample, c("um_n_quad_floral_res_nat", "um_n_quad_floral_res_int" ))
    
    # #export all plant file
    # fwrite(plant_matrix_date_site_sample, file = "data&scripts/data/output/plants_flat_file.csv")
    
    
# prep:drop bad sites from dataset ---------------------------------------------
    
    #Tim Mitchell says drop sites 5 & 15:
    #Drop project 5 and 15.  The non-native site at 5 was mowed sod (unlike all the others) and site 15, I have serious doubts the native site was seeded with native plants. I imagine it was a seeding error on the part of the contractor. Site 6 & site 2 also have missing dates of sampling: 16 typ was turned into a construction site midsummer, 2 typ has a missing datasheet. Certain analyses will require these sites/projects be dropped (for example to give an even sampling design).
    
    
    # # #drop the sites tim says were goofed 
    # l_plant_obs                   <- l_plant_obs[!site %in% c("5non","15nat")]
    # # date_site_sample              <- date_site_sample[!site %in% c("5non","15nat")]
    # plant_matrix_date_site_sample <- plant_matrix_date_site_sample[!site %in% c("5non","15nat")]
    # site_data                     <- site_data[!Site %in% c("5non","15nat")]
    
    # 
    #OR
    
    #Drop those projects entirely - This is used for the plants analysis
    l_plant_obs                   <- l_plant_obs[!project %in% c(5,15)]
    # date_site_sample              <- date_site_sample[!project %in% c(5,15)]
    plant_matrix_date_site_sample <- plant_matrix_date_site_sample[!project %in% c(5,15)]
    site_data                     <- site_data[!Project %in% c(5,15)]
    
# visualize: Fig 3 annual phenology plot ---------------------------------------------------
    
    plant_matrix_date_site_sample[ , trt := factor(trt, levels = c( "nat", "non", "typ"))]
  # 3 panel this to make figure:
   Floral_avail <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_n_quad_floral_res/N, group = trt))+
     # geom_line(aes(group = site, color = trt), alpha = .25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "A) All Flowers")+
     geom_smooth(method = "loess", aes(color = trt))+
      ylab("Proportion of quadrats")+
      ylim(c(0,1))+
      xlab(NULL)+
      theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
    Native_Floral_avail <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_n_quad_floral_res_nat/N, group = trt))+
      # geom_line(aes(group = site, color = trt), alpha = .25)+
      # facet_grid(rows = vars(trt) )+
      ggtitle(label = "B) Native Flowers")+
      geom_smooth(method = "loess", aes(color = trt))+
      ylab(NULL)+
      xlab("Date")+
      ylim(c(0,1))+
      theme(legend.position = c(.5,.7), legend.background = element_blank())+
      scale_color_brewer(palette = "Set2",name="Seeding\nTreatment", labels = c("Native mix","Non-Native Mix","Unknown/Typical Roadside"  ))
    Int_Floral_avail <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_n_quad_floral_res_int/N, group = trt))+
      # geom_line(aes(group = site, color = trt), alpha = .25)+
      # facet_grid(rows = vars(trt) )+
      ggtitle(label = "C) Introduced Flowers")+
      geom_smooth(method = "loess", aes(color = trt))+
      ylab(NULL)+
      ylim(c(0,1))+
      xlab(NULL)+
      theme(legend.position = "none")+
      scale_color_brewer(palette = "Set2")
    
    # pheno_3panel<- ggarrange(Floral_avail, Native_Floral_avail, Int_Floral_avail, nrow = 1)

    
    #' ## Combining
    plots.row <- align_plots(Floral_avail, Native_Floral_avail, Int_Floral_avail, align="hv", 
                             axis="tblr")
    div.rows <- plot_grid(plots.row[[1]], plots.row[[2]], plots.row[[3]],
                          nrow=1, #label_size = 7.5,
                          # label_fontface = "plain", labels = c("(a)", "(b)", "(c)"), 
                          hjust = -0.25, vjust = 2)
    
    # # Write figure 3
    # png(file = "Fig3_pquadflower_pheno_3panel.png", width = 8.5, height = 3, units = "in", res = 1200)
    div.rows
    # dev.off()

# visualize:phenology exploration plots ---------------------------------------------

        
    #phenology is driven by individual species:
    merge(site_data,
          l_plant_obs[ , .("count" = .N) , .(site,date,taxon)], #this call pulls out a count for each species at each point
          by.y = c("site"), by.x = c("Site"), all = TRUE)[ , .(count =  sum(count)) ,.(date,taxon)] 
    
    ggplot(    merge(site_data,
                     l_plant_obs[ , .("count" = .N) , .(site,date,taxon)], #this call pulls out a count for each species at each point
                     by.y = c("site"), by.x = c("Site"), all = TRUE)[ , .(count =  sum(count)) ,.(date,taxon)][] ,
               aes(date, count, group = taxon ))+
      geom_line(aes(color = taxon))+
      scale_y_log10()+
      theme(legend.position = "none")
    
    
    ggplot(plant_matrix_date_site_sample , aes( date, um_richness_floral_nat, group = trt))+
      # geom_line(aes(group = site, color = trt))+
      # facet_grid(rows = vars(trt) )+
      ggtitle(label = "native forb richness through season")+
      geom_smooth(method = "loess", aes(color = trt))+
      ylab("species richness")
   
   ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_n_quad_floral_res/N, group = trt))+
     geom_line(aes(group = site, color = age))+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "forb floral availability through season - color by age of site")
     # geom_smooth(method = "loess", aes(color = trt))
   
   ggplot(plant_matrix_date_site_sample , aes( age, um_n_quad_floral_res/N, group = trt))+
     geom_point(aes(color = trt))+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "forb floral availability with age by treatment")+
     geom_smooth(method = "lm", aes(color = trt))

   
   
   
# visualize:floral rich by age ------------------------------------------------------

   
   #Figure3:
   Floral_rich_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), jitter(um_forb_richness), group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "A) All Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     coord_cartesian(ylim = c(-.1,15))+
     ylab("# of Species")+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   Native_Floral_rich_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), jitter(um_richness_floral_nat), group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "B) Native Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     coord_cartesian(ylim = c(-.1,15))+
     ylab(NULL)+
     xlab("Age (years)")+
     theme(legend.position = c(.5,.8), legend.background = element_blank())+
     scale_color_brewer(palette = "Set2",name="Seeding\nTreatment", labels = c( "Native mix","Non-Native Mix" ))
   Int_Floral_rich_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), jitter(um_richness_floral_int), group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "C) Introduced Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     coord_cartesian(ylim = c(-.1,15))+
     ylab(NULL)+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   
   ggarrange(Floral_rich_age, Native_Floral_rich_age, Int_Floral_rich_age, nrow = 1)
   
# visualize:floral rich by season ------------------------------------------------------
   
   
   #FigureB1:
   Floral_rich_season <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_forb_richness, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "A) All Flowers")+
     geom_smooth(method = "loess", aes(color = trt))+
     ylab("# of Species")+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   Native_Floral_rich_season <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_richness_floral_nat, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "B) Native Flowers")+
     geom_smooth(method = "loess", aes(color = trt))+
     ylab(NULL)+
     xlab("Date")+
     theme(legend.position = c(.5,.7), legend.background = element_blank())+
     scale_color_brewer(palette = "Set2",name="Seeding\nTreatment", labels = c( "Native mix","Non-Native Mix", "Typical" ))
   Int_Floral_rich_season <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_richness_floral_int, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "C) Introduced Flowers")+
     geom_smooth(method = "loess", aes(color = trt))+
     ylab(NULL)+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   
   floralrich_pheno_3panel <- ggarrange(Floral_rich_season, Native_Floral_rich_season, Int_Floral_rich_season, nrow = 1)
   
   # # Write figure S2
   # png(file = "FigS2_pquadflower_pheno_3panel.png", width = 8.5, height = 3, units = "in", res = 1200)
   floralrich_pheno_3panel
   # dev.off()
   

# visualize: Fig 4 floral avail by age ------------------------------------------------------

   
   #Figurepanels:
   Floral_avail_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), jitter(um_n_quad_floral_res/N), group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "A) All Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     ylab("Proportion of quadrats")+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   Native_Floral_avail_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), jitter(um_n_quad_floral_res_nat/N), group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "B) Native Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     ylab(NULL)+
     xlab("Age (years)")+
     theme(legend.position = c(.5,.8), legend.background = element_blank())+
     scale_color_brewer(palette = "Set2", name="Seeding\nTreatment", labels = c( "Native mix", "Non-Native Mix"    ))
   Int_Floral_avail_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), jitter(um_n_quad_floral_res_int/N), group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "C) Introduced Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     ylab(NULL)+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   
   ggarrange(Floral_avail_age, Native_Floral_avail_age, Int_Floral_avail_age, nrow = 1)
   
   #' ## Combining
   plots.row <- align_plots(Floral_avail_age, Native_Floral_avail_age, Int_Floral_avail_age, align="hv", 
                            axis="tblr")
   div.rows <- plot_grid(plots.row[[1]], plots.row[[2]], plots.row[[3]],
                         nrow=1, #label_size = 7.5,
                         # label_fontface = "plain", labels = c("(a)", "(b)", "(c)"), 
                         hjust = -0.25, vjust = 2)
   
   # # Write figure 3
   # png(file = "Fig4_flowers_age_3panel.png", width = 8.5, height = 3, units = "in", res = 1200)
   div.rows
   # dev.off()
   

# visualize:shannon div by age ------------------------------------------------------

   
   #FigureB4 Shannon Diversity:
   Floral_div_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), um_shannons_floral, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "A) All Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     ylab("Shannon Div")+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   Native_Floral_div_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), um_shannons_nat, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "B) Native Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     ylab(NULL)+
     xlab("Age (years)")+
     theme(legend.position = c(.5,.8), legend.background = element_blank())+
     scale_color_brewer(palette = "Set2",name="Seeding\nTreatment", labels = c( "Native mix","Non-Native Mix" ))
   Int_Floral_div_age <- ggplot(plant_matrix_date_site_sample[!trt =="typ"] , aes( jitter(age), um_shannons_int, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "C) Introduced Flowers")+
     geom_smooth(method = "lm", aes(color = trt))+
     ylab(NULL)+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   
   ggarrange(Floral_div_age, Native_Floral_div_age, Int_Floral_div_age, nrow = 1)
   

# visualize:shannon div by season ----------------------------------------------------

  
   #FigureB2 Shannon Diversity seasonality:
   Floral_shannon_season <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_shannons_floral, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "A) All Flowers")+
     geom_smooth(method = "loess", aes(color = trt))+
     ylab("Shannon Div")+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   Native_Floral_shannon_season <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_shannons_nat, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "B) Native Flowers")+
     geom_smooth(method = "loess", aes(color = trt))+
     ylab(NULL)+
     xlab("date")+
     theme(legend.position = c(.5,.7), legend.background = element_blank())+
     scale_color_brewer(palette = "Set2",name="Seeding\nTreatment", labels = c( "Native mix","Non-Native Mix", "Typical" ))
   Int_Floral_shannon_season <- ggplot(plant_matrix_date_site_sample[!trt == "typ"] , aes( date, um_shannons_int, group = trt))+
     geom_point(aes(color = trt), alpha = 0.25)+
     # facet_grid(rows = vars(trt) )+
     ggtitle(label = "C) Introduced Flowers")+
     geom_smooth(method = "loess", aes(color = trt))+
     ylab(NULL)+
     xlab(" ")+
     theme(legend.position = "none")+
     scale_color_brewer(palette = "Set2")
   
   shannondiv_pheno_3p <- ggarrange(Floral_shannon_season, Native_Floral_shannon_season, Int_Floral_shannon_season, nrow = 1)
   
   # # Write figure S3
   # png(file = "FigS3_shannondiv_pheno_3panel.png", width = 8.5, height = 3, units = "in", res = 1200)
   shannondiv_pheno_3p
   # dev.off()
   # 
   # 
   
   
   
   
   #Figure3d:
   #Want a figure showing change in planted community over time

#' Alright, here we need to pause. These are "uninformed" linear models struck 
#' through the data with a single response variable
#'
#' The way to properly handle these is to print out the partial effects plots, not these.
#' 
#'   

# visualize: how problematic is the project effect ------------------------

   
   ggplot(plant_matrix_date_site_sample , aes( jitter(project,3), jitter(age,3)))+
     geom_point(aes(size = um_n_forb_observed), alpha = 0.35)
   
   
   #reorder the data in increasing age. 
   setorder(plant_matrix_date_site_sample, age, na.last = T)
      plant_matrix_date_site_sample[ , unique(project)  ,]
   #set project as a factor variable
      plant_matrix_date_site_sample[ , project := factor(as.character(project), levels =as.character(unique(plant_matrix_date_site_sample$project))), ]
   
   
   ggplot(plant_matrix_date_site_sample , aes( project, um_n_forb_observed))+
     geom_point(aes(size = age, color = trt ), alpha = 0.35)
   
   ggplot(plant_matrix_date_site_sample , aes( age, um_n_forb_observed))+
     geom_point(aes(size = um_total_richness , color = trt), alpha = 0.35)
   
   
   

   
# visualize: plot diversity effects broken out by nat/introduced plants boxplots --------------

    # total diversity as a fn of trt
   ggplot(plant_matrix_date_site_sample , aes(um_shannon, trt))+
     geom_boxplot()
   
   # floral diversity as a fn of trt
   ggplot(plant_matrix_date_site_sample , aes(um_shannons_floral, trt))+
     geom_boxplot()
   
   # native floral diversity as a fn of trt
   ggplot(plant_matrix_date_site_sample , aes(um_shannons_floral_nat, trt))+
     geom_boxplot()
   # introduced floral diversity as a fn of trt
   ggplot(plant_matrix_date_site_sample , aes(um_shannons_floral_int, trt))+
     geom_boxplot()


# visualize: floral richness boxplots ---------------------------------------------------------

      
   # floral richness as a fn of trt
   ggplot(plant_matrix_date_site_sample , aes(trt, um_forb_richness))+
     geom_boxplot()
   
   # native floral diversity as a fn of trt
   ggplot(plant_matrix_date_site_sample , aes(um_richness_floral_nat, trt))+
     geom_boxplot()
   # introduced floral diversity as a fn of trt
   ggplot(plant_matrix_date_site_sample , aes(um_richness_floral_int, trt))+
     geom_boxplot()+
     geom_boxplot(aes(um_richness_floral_nat), position = position_dodge2(width = 2))
   
   l_rich_div <- melt(plant_matrix_date_site_sample, id.vars = c("date", "site", "trt", "project"), 
                      measure.vars = c("um_forb_richness", "um_richness_floral_int", "um_richness_floral_nat", "um_shannons_floral", "um_shannons_floral_int", "um_shannons_floral_nat") )
   
   # ggplot(l_rich_div[variable %in% c("um_forb_richness", "um_richness_floral_int", "um_richness_floral_nat")], aes(trt, value))+
   #          geom_boxplot(aes(fill = variable), position = "dodge")
   
   ggplot(l_rich_div[variable %in% c("um_forb_richness", "um_richness_floral_int", "um_richness_floral_nat")], aes( variable, value))+
     geom_boxplot(aes(fill = trt), position = "dodge")
   

# visualize: floral diversity plots --------------------------------------------------

   # ggplot(l_rich_div[variable %in% c("um_shannons_floral", "um_shannons_floral_int", "um_shannons_floral_nat")], aes(trt, value))+
   #   geom_boxplot(aes(fill = variable), position = "dodge")
   
   ggplot(l_rich_div[variable %in% c("um_shannons_floral", "um_shannons_floral_int", "um_shannons_floral_nat")], aes( variable, value))+
     geom_boxplot(aes(fill = trt), position = "dodge")
   
  
   


# visualize: paired or nestedness of our data within projects -----------------------------

   # plant_matrix_date_site_sample[ , project_date := paste(project,date) , ]
   
   ggplot(plant_matrix_date_site_sample , aes( trt, jitter(um_total_richness)))+
     geom_boxplot()+
     geom_line(aes(group = interaction(project,date)), alpha = .5)+
     ggtitle(label = "floral richness by Treatment")

# test:do the analysis of seeding effects with a t-test -------------------
   #structure data appropriately
   dcast(plant_matrix_date_site_sample[trt != "typ", um_shannon , .(project,jday,trt) ], project + jday ~trt)
   
   #paired t-test, alternate hypothesis that native has higher div than non (choose different um_ tagged response metrics to adjust what you're testing for)
   t.test( dcast(plant_matrix_date_site_sample[trt != "typ", um_shannon , .(project,jday,trt) ], project + jday ~trt)[,nat,],# all the native samples
           dcast(plant_matrix_date_site_sample[trt != "typ", um_shannon , .(project,jday,trt) ], project + jday ~trt)[,non,],# compared to their corresponding "non"
           alternative = "g", paired = T)
   #and richness?
   t.test( dcast(plant_matrix_date_site_sample[trt != "typ", um_total_richness , .(project,jday,trt) ], project + jday ~trt)[,nat,],# all the native samples
           dcast(plant_matrix_date_site_sample[trt != "typ", um_total_richness , .(project,jday,trt) ], project + jday ~trt)[,non,],# compared to their corresponding "non"
           alternative = "g", paired = T)
   
   
# test:univariate responses linear models --------------------------------------------------------

   # are there differences between the three types of treatments
   basicmod <- lmer(data = plant_matrix_date_site_sample, um_shannon ~ trt + poly(jday,2) + scale(soil_pene_mean) + scale(Total_Sand) + (1|project)) 
   
   summary(basicmod)
   
    # does seeding increase diversity and richness? This model will dump all of the typ sites because we have no age data
   notyp <- plant_matrix_date_site_sample[trt != "typ"]
   
   
   shannondiv <- lmer(data = notyp, um_shannon ~ trt + age + trt:age + poly(jday,2) + scale(soil_pene_mean) + scale(Total_Sand) + (1|project))  
    
    summary(shannondiv)
    anova(shannondiv)
    plot(shannondiv)
    plot_model(shannondiv, type = "pred", terms = "trt")
    plot(ggpredict(shannondiv, terms = c("jday", "age")))
    

    #richness
    rich_mod <- glmer(um_forb_richness ~ trt*age + poly(jday,2)  + scale(soil_pene_mean) + scale(`Total_Sand`) + scale(area_m2) + (1|project) ,
                      data = plant_matrix_date_site_sample,   family = "poisson" )
    #singularity?
    tt <- getME(rich_mod,"theta")
    ll <- getME(rich_mod,"lower")
    min(tt[ll==0])
    
    #restart from last fit:
    ss <- getME(rich_mod,c("theta","fixef"))
    rich_mod_m2 <- update(rich_mod,start=ss,control=glmerControl(optCtrl=list(maxfun=2e4)))
    #fixed it.
    
    summary(rich_mod_m2)
    anova(rich_mod_m2)
    #view some estimates:
    plot(rich_mod_m2)
    plot_model(rich_mod)
    # plot_model(rich_mod, type = "pred", ci.lvl = .95,
    #            terms = c( "age", "trt"))
    # plot_model(shannondiv, type = "pred",
    #            terms = c( "jday", "age"))

    
#' We probably need a random effect for project (without one, we violate independence of samples)
#'    - This is easy enough to do on single species or metric (richness, diversity) models, but challenging in the whole-community approach (mvGLMs). 
#'    - might be possible through a permutation approcah: http://edild.github.io/mvabund/
#'    - Could consider ignoring the nestedness within project
#' We need to model age, and that's likely best done linearly
#' We need to model date -- this one is more tricky, likely needing a second degree polynomial fit (see plots above).
#'
#' 
#'   
#' When we present our contingency table, we want to present seeding benefit at 
#' best age & peak flowering. We can either let the data show us those values
#' (but we are data poor in this regard) or use values from the lit to assess.
#' To skirt the issue entirely, we need to come up with a metric (odds ratios?) 
#' that schmoozes over the issue, by saying "across all ages and dates, Xspecies
#' was Z times more present when planted compared to unplanted.    
   
   

# NMDS ordination viz of data ---------------------------------------------------------

    nmds_matrix <- as.matrix(plant_matrix_date_site_sample[,10:140])

    comm_nmds <- metaMDS(nmds_matrix, k = 3, distance = "bray")

    data.scores = as.data.frame(scores(comm_nmds)$sites)

    data.scores$Treatment = plant_matrix_date_site_sample$trt
    data.scores$Age = plant_matrix_date_site_sample$age
    data.scores$Date = plant_matrix_date_site_sample$date

    ggplot(data.scores, aes(x = NMDS1, y = NMDS2)) +
      geom_point(size = 2, aes( shape = Treatment, colour = -Age), alpha = .8)+
      theme(axis.text.y = element_text(colour = "black", size = 12, face = "bold"),
            axis.text.x = element_text(colour = "black", face = "bold", size = 12),
            legend.text = element_text(size = 12, face ="bold", colour ="black"),
            legend.position = "right", axis.title.y = element_text(face = "bold", size = 14),
            axis.title.x = element_text(face = "bold", size = 14, colour = "black"),
            legend.title = element_text(size = 14, colour = "black", face = "bold"),
            panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, linewidth = 1.2),
            legend.key=element_blank()) +
      labs(x = "NMDS1", colour = "Age", y = "NMDS2", shape = "Type")

    # #ANOSIM test
    # ano = anosim(nmds_matrix, plant_matrix_date_site_sample$trt, distance = "bray", permutations = 999)
    # ano
    # 
    # anoAge = anosim(nmds_matrix, plant_matrix_date_site_sample$age, distance = "bray", permutations = 999)
    # anoAge
    # 

# big viz of species effects ----------------------------------------------

abu <- plant_matrix_date_site_sample[ ,.SD , .SDcols = 10:140]
head(abu)

#create mvabund matrix

plant_matrix_date_site_sample[, instance := 1:.N, .(project,trt) ]

plant_matrix_date_site_sample$instance

env <- data.frame(age = plant_matrix_date_site_sample$age,
                  treatment = plant_matrix_date_site_sample$trt,
                  # datesamp = yday(sub$date),
                  # soil = scale(sub$soil_pene_mean),
                  # sand = scale(sub$Total_Sand),
                  # area = scale(sub$area_m2),
                  # block = sub$project,
                  instance = plant_matrix_date_site_sample$instance)
head(env)

plotraw <- data.frame(abu, env)
plotraw <- melt(plotraw, id.vars = c('treatment', 'age', 'instance'))
ggplot(plotraw, aes(x = age, y = value,
                    col = factor(treatment))) +
  geom_point(alpha = .25) +
  geom_smooth(aes(group = treatment), se = FALSE) +
  facet_wrap(~variable, scales = "free") +
  #scale_y_log10() +
  labs(y = 'Proportion Occ', x = 'Age') +
  # geom_vline(aes(xintercept = 0))  +
  theme_bw() +
  theme(legend.justification = c(1, 0),
        legend.position = c(1, 0),
        legend.key = element_blank())

# test:mvabund binomial model -------------------------------------------------------

    
    
    
    #prep data for mvGLM
    #create mvabund matrix
    
    plant_matrix_date_site_sample[, instance := 1:.N, .(project,trt) ]
    
    plant_matrix_date_site_sample$instance
    
    #fix the bad instance vals (assigned into groups with <6 samples & one being missing)
    plant_matrix_date_site_sample[site %in% c("2typ", "2non"), .( week(as.IDate(date, format = "%m/%d/%Y")),site, instance) ,  ]
    
    plant_matrix_date_site_sample[site == "2typ", instance := c(1,3:6) ]
    
    plant_matrix_date_site_sample[ , date:= as.IDate(date, format = "%m/%d/%Y") ,]
    
    plant_matrix_date_site_sample[ ,instance_mean:= mean.Date(date), instance]
    
    setorder(plant_matrix_date_site_sample,project, trt)
    
    #nestedness
    sub <- plant_matrix_date_site_sample[!trt == "typ" ]#note we are excluding typ sites
    
    
    # build mvabund obj
    
    names(sub)
    plant_spp <- mvabund(sub[,10:140]) 
    
    # #drop out the typ ditches here b/c we have NAs for age and desire to model it
    # sub <- plant_matrix_date_site_sample[trt != "typ", ]
    # sub[ , trt := factor(trt, levels = c("non","nat")) , ]
    # summary(sub$trt)
    # 
    
    #basic plots
    boxplot(sub[,10:140],horizontal = TRUE,las=2, main="Abundance", cex.axis=0.5)

    meanvar.plot(plant_spp)
    abline(0,1)
    
    plot(plant_spp~sub[, trt])
   
    # #we can't use a random effect to account for nestedness in the project design, so here's an approach to permutation resampling that *should* fix the issue: http://edild.github.io/mvabund/
    
    control <- how(blocks = sub$project,
                   nperm = 2000)
    permutations <- shuffleSet(nrow(plant_spp), control = control)

    
    # fit the models to our data
    
    # this is what we want to estimate with (skipping sand b/c of missing data)
    mod_full_A <- manyglm(plant_spp ~ trt*age + poly(jday,2)  + scale(soil_pene_mean) + scale(area_m2)  ,
                        family = binomial(),# offset = N,
                        data = sub)
    mod_null_A <- manyglm(plant_spp ~ age + poly(jday,2)  + scale(soil_pene_mean) + scale(area_m2) ,
                        family = binomial(), # offset = N,
                        data = sub)
    
    #this model is uuuuugly - needs a variance estimator? we actually fit this multiple ways (below) and we feel this is the best model
    plot(mod_full_A)
    plot.manyglm(mod_full_A, which = 1:3)
    
            # trying to diagnose the variance fit issue:
            #distribution of response data:
            names(sub)[1:5]
            freq <- melt(sub, id.vars = names(sub)[1:5], measure.vars = names(sub)[10:139], variable.name = "taxon", value.name = "freq")
            
            ggplot(freq, aes(x = freq))+
              geom_histogram(binwidth = .01)+
              scale_y_log10()
    
    #run a comparison of the treatment model to a null model
    # mod_compare_A <- anova(mod_full_A, mod_null_A, bootID = permutations, p.uni = 'adjusted', nBoot = 999) #
    # save(mod_compare_A, file = "modelcomparison_A2.RData")
    load(file = "modelcomparison_A2.RData")
    
    # uncomment to run summary
    # fullsummary_A <- summary(mod_full_A, bootID = permutations, p.uni = "adjusted", nBoot = 999)
    # save(fullsummary_A, file = "summaryresults_A2.RData")
    load(file = "summaryresults_A2.RData")
    

    # uncomment to run anova
    # fullanova_A <- anova(mod_full_A, bootID = permutations, p.uni = "adjusted", nBoot = 999)
    # save(fullanova_A, file = "anovaresults_A2.RData")
    load(file = "anovaresults_A2.RData")
    # 
    
    str(mod_compare_A)
    fullsummary_A
    fullanova_A
    
# test:mvabund - residuals are problematic --------------------------------

    
    #many attempts at fixing the variance problem:
    #try to drop some of the zeros? No fix
    n30_sub <- plant_matrix_date_site_sample[ !trt == "typ" , c(.SD, .(date = date,trt = trt)) , .SDcols = l_plant_obs[ , .("count" = .N) , .(taxon)][count>30, taxon,]   ]
    n30_sub[ , trt := factor(trt) , ]
    summary(n30_sub$trt)
    names(n30_sub)

    plant_spp <- mvabund(n30_sub[,1:36])

    #refit model
    mod_full <- manyglm(plant_spp ~ trt + age + poly(jday,2)  + scale(soil_pene_mean) + scale(Total_Sand) + scale(area_m2) ,
                        family="binomial",
                        data = sub)
    #this model is also uuuuugly
    plot(mod_full)
    
    rm(n30_sub)
    
    # #try to use count data instead
    # convert counts to proportion data
    summary(plant_matrix_date_site_sample[, 10:140])
    cols <- names(plant_matrix_date_site_sample)[ 10:140]
    plant_matrix_date_site_sample[ , (cols) := lapply(.SD, function(x){as.integer(x*N)}) , .SDcols = cols ]
    plant_matrix_date_site_sample[, summary(.SD) ,.SDcols = cols]
    # #
    # 
    
    

# test:mvabund count based model -------------------------------------------------------

    
    # #refit model
    plant_spp_int <- mvabund(plant_matrix_date_site_sample[!trt == "typ", 10:140])
    
    mod_full <- manyglm(plant_spp_int ~ trt*age + poly(jday,2)  + scale(soil_pene_mean) + scale(Total_Sand) + scale(area_m2) ,
                        family= "negative.binomial",
                        offset = N,
                        data = sub)
    # this sorta seems to fix things. Note the q-q plot has some craziness in it and the ends of the linear predictor are still high var(sideways hourglass in resid v fitted)
    plot(mod_full)
    plot.manyglm(mod_full, which = 1:3)
    
    
    #null model
    mod_null <- manyglm(plant_spp_int ~ age + poly(jday,2)  + scale(soil_pene_mean) + scale(Total_Sand) + scale(area_m2) ,
                        family = "negative.binomial",
                        offset = N,
                        data = sub)
    
    # mod_compare_B <- anova(mod_full, mod_null, bootID = permutations, p.uni = 'adjusted') 
    # save(mod_compare_B, file = "modelcomparison_B.RData")
    load(file = "modelcomparison_B.RData")
    mod_compare_B
    
    # uncomment to run summary
    # fullsummary_B <- summary(mod_full, p.uni = "adjusted")
    # save(fullsummary_B, file = "summaryresults_B.RData")
    load(file = "summaryresults_B.RData")
    
    fullsummary_B
   
       # uncomment to run anova
        # fullanova_B <- anova(mod_full, p.uni = "adjusted")
        # save(fullanova_B, file = "anovaresults_B.RData")
    load(file = "anovaresults_B.RData")
    # 
    fullanova_B
    
    # convert counts BACK to proportion data
    summary(plant_matrix_date_site_sample[, 10:140])
    cols <- names(plant_matrix_date_site_sample)[ 10:140]
    plant_matrix_date_site_sample[ , (cols) := lapply(.SD, function(x){as.numeric(x/N)}) , .SDcols = cols ]
    plant_matrix_date_site_sample[, summary(.SD) ,.SDcols = cols]
    
    
# visualize: mvabund ------------------------------------------------------

   fullanova <- fullanova_A
   fullsummary <- fullsummary_A
    
   speciessumresults   <- data.table(var = rownames(fullsummary$uni.test), fullsummary$uni.test)[data.table(var =  rownames(fullsummary$uni.p), fullsummary$uni.p), on = .(var = var)]
   
   

    speciesmodresults <- rbind(data.table(var = rownames(fullanova$uni.test), fullanova$uni.test),data.table(var = paste("p_", sep = "", rownames(fullanova$uni.p)), fullanova$uni.p))
    speciesmodresults[ ,exp(.SD) , .SDcols = c("Rudbeckia_hirta") ]
    
    # fwrite(speciesmodresults, file = "data&scripts/data/output/anova_species_table.csv")
    
    # effects table:
    eff <- data.table(species = colnames(fullanova$uni.test), deviance = fullanova$uni.test[2 , ], p_val = fullanova$uni.p[2,])
    
    l_mod_eff <- melt(eff, id.vars = "species")
    setDT(l_mod_eff)
    l_mod_eff <- l_mod_eff[ plant_taxa , on= .(species = Taxon)]
    
    eff <- eff[plant_taxa, on = .(species = Taxon)]
    setorder(eff, deviance)
    l_mod_eff[ , `Species Name`:= factor(`Species Name`, levels = eff$`Species Name`)]
    
    l_mod_eff[ `Minnesota Native?` == "" , `Minnesota Native?`:= "unclassified", ]
    l_mod_eff[ , `Minnesota Native?`:= factor(`Minnesota Native?`, levels = c("native", "introduced", "invasive", "unclassified" ))]
    
    match(l_mod_eff[, species], speciessumresults[,var])
    
    
    l_mod_eff[ , nat_est := speciessumresults[ match(l_mod_eff[, species], speciessumresults[,var]), `(Intercept)` , ] ,]
    
    
    ggplot(data = l_mod_eff[variable == "deviance" , ],   aes(x = "Common Name & Status", y=`Species Name`, fill=`Minnesota Native?`)) + 
      geom_tile() +
      geom_text( aes(label =l_mod_eff[variable == "p_val" , `Common Name`  ]), size=3 ) +
      labs(y = NULL)+
      scale_fill_brewer(palette = "Set2",name="Species Designation", labels = c("native", "introduced", "invasive", "unclassified"   ))+
      
      theme_bw()+
      new_scale_fill()+
      geom_tile(data = l_mod_eff[variable == "deviance" , ], aes(x = "Deviance", y=`Species Name`, fill = value))+
      scale_fill_gradient(low = "white", high = "steelblue", name = "Deviance", guide = 'none') +
      geom_text(aes(x = "Deviance",label = sprintf("%0.4f", l_mod_eff[variable == "deviance" , value])), size=3) +
      
      new_scale_fill()+
      geom_tile(data = l_mod_eff[variable == "p_val" , ], aes(x = "Dev p value", y=`Species Name`, fill=value))+
      scale_fill_gradient(trans = "log", low = "green", high = "red", 
                          breaks = c(0, 0.001, 0.05, 0.5), name = "p_value", guide = 'none')+
      geom_text(aes(x = "Dev p value",label = sprintf("%0.4f", l_mod_eff[variable == "p_val" , value ])), size=3) +
      
      new_scale_fill()+
      geom_tile(data = l_mod_eff[variable == "deviance" , ], aes(x = "Wald Value", y=`Species Name`, fill = nat_est))+
      scale_fill_gradient(low = "white", high = "steelblue", name = "Waldval", guide = 'none') +
      geom_text(aes(x = "Wald Value",label = sprintf("%0.4f", l_mod_eff[variable == "deviance" , nat_est])), size=3) +
      
      labs(x = NULL) +
      scale_x_discrete(expand = c(0, 0), position = "top")
      # theme(axis.ticks = element_blank(),
      #       axis.text.x =element_blank(), 
      #       plot.background = element_blank())
    
    

# test: principle response curves -----------------------------------------

    #sourced from: http://edild.github.io/prc1/
    
    plant_matrix_date_site_sample[ , trt := factor(trt, levels = c("typ", "non", "nat"))]
    
    #prep a dataset for PRC
    
    prc_sub <- plant_matrix_date_site_sample[ !trt == "typ" , c(.SD, .(date = date,trt = trt, age = age, week = instance)) , .SDcols = l_plant_obs[ , .("count" = .N) , .(taxon)][, taxon,]   ]
    prc_sub[ , trt := factor(trt, levels = c("non", "nat"))]
    
    # names(prc_sub)[1:130] <- tolower(plant_taxa[match(names(prc_sub)[1:130], plant_taxa[ , Taxon]), `Common Name`])
    
    
    names(prc_sub) <- gsub("_", " ", names(prc_sub)) 
    
    # time effects (age)
    ditch_prc_age <- prc(response = prc_sub[,1:133], treatment = prc_sub$trt, time = factor(-prc_sub$age))
    sum_prc_age <- summary(ditch_prc_age)
   
    
    ditch_prc_age #RDA
    sum_prc_age$sp #PRC
    
    # anova(ditch_prc_age)
    
    # plot(ditch_prc_age)
    plot(ditch_prc_age, select = abs(sum_prc_age$sp) > 0.04, scaling = "symmetric", type = "l", ylab = "", xlab = "Age (years)", legpos = NA, xlim = rev(range(-prc_sub$age)), xaxt = "n")
    mtext(side = 2, expression(paste("Principal response (C "[dt],")")), padj = -3,cex = 1, las = 0)
    mtext("Species Weights", adj = .9 , outer = T, padj = 1)
    axis(1, at = seq(-7.5,8, length.out = 5),
         labels = c(0,5,10,15,20), las = 1)
    
    
    sum_prc_age$coefficients
    

    # If we divide this eigenvalue by the sum of all eigenvalues, we obtain the proportion of explained variance which is displayed on each axis.
    ditch_prc_age$CCA$eig/sum(ditch_prc_age$CCA$eig)
    
    
    
    # date effects (phenology)
    ditch_prc_pheno <- prc(response = prc_sub[,1:133], treatment = prc_sub$trt, time = factor(prc_sub$week))
    sum_prc_pheno <- summary(ditch_prc_pheno)
    
    ditch_prc_pheno
    sum_prc_pheno$sp
    
    sum_prc_pheno$coefficients <- -1*sum_prc_pheno$coefficients
    
    ditch_prc_pheno$CCA$eig
    
    
    
    
    # plot(ditch_prc_pheno)
    meandates <-  format(plant_matrix_date_site_sample[ , unique(instance_mean) ,], "%d%b")
    # plot(ditch_prc_pheno, select = abs(sum_prc_pheno$sp) > 0.04, scaling = "symmetric")

    str(ditch_prc_pheno)
    
    plot(ditch_prc_pheno, select = abs(sum_prc_age$sp) > 0.04, scaling = "symmetric", type = "l", ylab = "", xlab = "Mean Visit Date", xaxt = "n", legpos = NA)
    mtext(side = 2, expression(paste("Principal response (C "[dt],")")), padj = -3,cex = 1, las = 0)
    mtext("Species Weights", adj = .95, outer = T, padj = 2.5)
    axis(1, at = seq(-7.1,7.1, length.out = 6),
         labels = meandates, las = 2)

    
    
    # anova(ditch_prc_pheno)
    
    # # If we divide this eigenvalue by the sum of all eigenvalues, we obtain the proportion of explained variance which is displayed on each axis.
    # ditch_prc_pheno$CCA$eig/sum(ditch_prc_pheno$CCA$eig)
    # 


# seedmix effects ---------------------------------------------------------
#' When we present our contingency table, we want to present seeding benefit at 
#' best age & peak flowering. We can either let the data show us those values
#' (but we are data poor in this regard) or use values from the lit to assess.
#' To skirt the issue entirely, we need to come up with a metric (odds ratios?) 
#' that schmoozes over the issue, by saying "across all ages and dates, Xspecies
#' was Z times more present when planted compared to unplanted. 

    
    ggplot(plant_matrix_date_site_sample, aes(um_total_richness, Seed_mix))+
      geom_boxplot()+
      ggtitle(label = "Species richness by SEEDMIX")
    
    names(plant_matrix_date_site_sample)
    plant_matrix_date_site_sample[ , summary(Agastache_foeniculum), ]
    

# species in each mix -----------------------------------------------------

plant_matrix_date_site_sample[ , .N , Seed_mix]
    
# species planting contingency table --------------------------------------
  
  #These models will not account for any covariates. They simply ask, what's the seeding benefit across this entire study
  
    #list species in the seedmix
    mix_spp_alpha_genus <- sort(names(plant_matrix_date_site_sample)[163:241])
  
    #check for matches in the observations:
    cbind(names(plant_matrix_date_site_sample)[163:241],names(plant_matrix_date_site_sample)[10:139] )
    
    match(word(names(plant_matrix_date_site_sample)[163:241], start = 2, end = -1, sep = fixed("_")), names(plant_matrix_date_site_sample)[10:139])
    
    #make a seeding table of 0 or 1 found or not found
    #strip first word instead of s_
    seeded_and_observed <- word(names(plant_matrix_date_site_sample)[163:241], start = 2, end = -1, sep = fixed("_"))[!is.na(match(word(names(plant_matrix_date_site_sample)[163:241], start = 2, end = -1, sep = fixed("_")), names(plant_matrix_date_site_sample)[10:139]))]
    
    seeded_not_obs <- word(names(plant_matrix_date_site_sample)[163:241], start = 2, end = -1, sep = fixed("_"))[is.na(match(word(names(plant_matrix_date_site_sample)[163:241], start = 2, end = -1, sep = fixed("_")), names(plant_matrix_date_site_sample)[10:139]))]
    
    #now limit these lists to things that were being looked for:
    #things that were being looked for:
    plant_taxa[ , Taxon, ] #all flowers/forbs and 10 grasses we ID'd
    seeded_and_observed %in% plant_taxa[ , Taxon, ]
    seeded_not_obs %in% plant_taxa[ , Taxon, ]
    
    seeding_outcomes <- cbind(c(seeded_and_observed,26:50),seeded_not_obs)
    
    colnames(seeding_outcomes) <- c("observed flowering", "not observed flowering")
    
    # fwrite(seeding_outcomes, file = "data&scripts/data/output/seeding_obs_nonobs.csv")
  
    #  
    plant_matrix_date_site_sample[ , mean(Asclepias_syriaca) , .(s_Asclepias_syriaca)]
      
      summary(plant_matrix_date_site_sample[ , c(163:241)])
      
      seedcols <- names(plant_matrix_date_site_sample)[163:241]
      plant_matrix_date_site_sample[ , (seedcols) := lapply(.SD, as.factor) , .SDcols = seedcols ]
      
      #set levels to be ordered 0,1
      plant_matrix_date_site_sample[, (seedcols) := setattr(.SD, "levels", c("0", "1")), .SDcols = seedcols]
      
      levels(plant_matrix_date_site_sample$s_Zizia_aurea)
      
      seeding_table <- data.table(species = character(), planted = factor(), min = numeric(), mean = numeric(), max = numeric(), sd = numeric())
    
      formula_strings = paste(paste( seeded_and_observed, paste("s_", seeded_and_observed ,sep = ""), sep = " ~ "), "")
      
      #prep a table to recieve the data
      coefs <- data.table(species = NA,
                          intercept = NA, 
                          intercept_se = NA,
                          seeding = NA,
                          seeding_se = NA)
      
  for (i in 1:length(seeded_and_observed)) {
    
    j <- seeded_and_observed[i]
    k <- paste("s_", j ,sep = "")
    
    print(j)
    print(plant_matrix_date_site_sample[ , .(min= min(get(j)), mean = mean(get(j)), max = max(get(j)), sd = sd(get(j))) , get(k)] )[1:2]#
    temp_table <- plant_matrix_date_site_sample[ , .(min= min(get(j)), mean = mean(get(j)), max = max(get(j)), sd = sd(get(j))) , get(k)]
    seeding_table <- rbind(seeding_table,
          data.table(species = j, 
                     planted = temp_table[ , get], 
                     min = temp_table[ , min], 
                     mean = temp_table[ , mean], 
                     max = temp_table[ , max], 
                     sd = temp_table[ , sd])
    )
    
    mod = stan_glm(as.formula(formula_strings[i]), 
                data=plant_matrix_date_site_sample, 
                family = binomial(link = 'logit'), 
                weights = N)
    coefs <- rbind(coefs,
                           data.table(species = j,
                                      intercept = fixef(mod)[1], 
                                      intercept_se = mod[[2]][1],
                                      seeding = fixef(mod)[2],
                                      seeding_se = mod[[2]][2])
    )
    print(summary(mod))
    mod[[2]]
    
    # write.csv(coefs, paste0(file_prefix[i], "coef.csv")
    
    # #convert to OR and export
    # OR = as.data.frame(logistic.display(mod)$table)
    # write.csv(OR, file = paste0(file_prefix[i], "OR.csv"))
    
    #plot?
      #ggpredict(mod, terms = "s_Zizia_aurea")
     
  }
      #build out that coefs table
      coefs <- data.table(coefs)
      coefs[ , odds_ratio_seed := exp(seeding) , ]
      coefs[ , "odds_ratio_seed_se" := exp(seeding_se) , ]
      
      
      
      # fwrite(seeding_table, file = "data&scripts/data/output/seeding_table.csv")  
      # fwrite(coefs, file = "data&scripts/data/output/seeding_coefficients_V2.csv") 
      


# summary statistics  ------------------------------------------------

      #how many sites per trt? a.k.a how many projects
      plant_matrix_date_site_sample[ , .("sites_per_trt" = length(unique(site))) , .(trt)]
      
      #how many quads?
      plant_matrix_date_site_sample[ , sum(N) , .(trt) ]#N is the number of quadrats sampled in each survey (on each site-date)
      
      #how many plant observations (multiple taxa often found at each site, recall we only named stuff with flower or seedhead and only 10 grasses)
      l_plant_obs[!taxon == "nofloweringveg", .N]
      
      #how many flowering forb observations
      l_plant_obs[taxon %in% plant_taxa[`grass/flower`== "flower", Taxon,], .N, .(trt)]
      
      #how many taxa observed
      l_plant_obs[!taxon == "nofloweringveg", length(unique(taxon))]
    
      #how many flowerign forb taxa observed
      l_plant_obs[trt!="typ" & taxon %in% plant_taxa[`grass/flower`== "flower", Taxon,], length(unique(taxon))]
      
      #how many dates was each site sampled on? number of rows is a count of unique sites
      l_plant_obs[ , length(unique(date)) , .(trt, project, site)]

      #how many species observed in each site
      l_plant_obs[!taxon == "nofloweringveg" , length(unique(taxon)) , .(trt, project, site)]    
      
      #how many species observed in each trt
      l_plant_obs[!taxon == "nofloweringveg" , length(unique(taxon)) , .(trt)] 

      #site areas
      plant_matrix_date_site_sample[ , unique(area_m2) , .(site)]
      plant_matrix_date_site_sample[ , unique(area_m2) , .(site)][ , summary(V1)] #summarize this
      plant_matrix_date_site_sample[trt!="typ" , unique(area_m2) , .(site)][ , summary(V1)] #summarize this w/o typ
      
      
      #n quadrats per survey
      plant_matrix_date_site_sample[ , unique(N), .(site, date)]
      plant_matrix_date_site_sample[ , unique(N), .(site, date)][ , summary(V1)] #summarize this
      
      # % of quads with a floral resource
      summary(plant_matrix_date_site_sample[ , um_n_quad_floral_res/N ,  ])# all forb taxa
      summary(plant_matrix_date_site_sample[ , um_n_quad_floral_res_nat/N ,  ])# native forb
      summary(plant_matrix_date_site_sample[ , um_n_quad_floral_res_int/N ,  ])#introduced
      
      # % of quads with a floral resource, by trt
      bytrt <- plant_matrix_date_site_sample[ ,.(trt, percquadfloral = um_n_quad_floral_res/N) ,   ][ , .("allforb" = summary(percquadfloral)) ,.(trt) ]
      bytrt[,val := rep(c("Min.", "1stQu.", "Median", "Mean", "3rdQu.", "Max."),3) , ]
      bytrt[, natforbpercquadsfloral := plant_matrix_date_site_sample[ ,.(trt, percquadfloral = um_n_quad_floral_res_nat/N) ,   ][ , .("natforb" = summary(percquadfloral)) ,.(trt) ][ , natforb ,] , ]
      bytrt[, intforbpercquadsfloral := plant_matrix_date_site_sample[ ,.(trt, percquadfloral = um_n_quad_floral_res_int/N) ,   ][ , .("intforb" = summary(percquadfloral)) ,.(trt) ][ , intforb ,] , ]
      

# make a table of the most common species in each trt group ---------------


      #by trt commonness
      names(plant_matrix_date_site_sample)
      plant_matrix_date_site_sample[ , .N , trt]
      taxa_cols <- names(plant_matrix_date_site_sample)[10:140]
      
      l_foc <- melt(plant_matrix_date_site_sample, id.vars = c("date", "site", "trt", "project", "N" ) , measure.vars = taxa_cols  ,  )
      
      l_foc[ , .N , trt ]
      
      
      l_foc_nat <-l_foc[trt == "nat"] 
      l_foc_non <-l_foc[trt == "non"] 
      l_foc_typ <-l_foc[trt == "typ"] 
      
      table_1a <- l_foc_nat[ , .("mean_prop" = mean(value), "sd_prop" = sd(value), "count_nonzero" = sum(value>0) ) , variable ]
      table_1b <- l_foc_non[ , .("mean_prop" = mean(value), "sd_prop" = sd(value), "count_nonzero" = sum(value>0)) , variable ]
      table_1c <- l_foc_typ[ , .("mean_prop" = mean(value), "sd_prop" = sd(value), "count_nonzero" = sum(value>0)) , variable ]

      setorder(table_1a, -mean_prop )
      setorder(table_1b, -mean_prop)
      setorder(table_1c, -mean_prop )
      
      names(table_1a) <- paste("nat_",names(table_1a), sep = "")
      names(table_1b) <- paste("non_",names(table_1b), sep = "")
      names(table_1c) <- paste("typ_",names(table_1c), sep = "")
      
      match(table_1a[ ,nat_variable ,], plant_taxa[ , Taxon ,])
      
      
      table_1a[ , nat_common_name := plant_taxa[match(table_1a[ ,nat_variable ,], plant_taxa[ , Taxon ,]), `Common Name`] , ]
      table_1b[ , non_common_name := plant_taxa[match(table_1b[ ,non_variable ,], plant_taxa[ , Taxon ,]), `Common Name`] , ]
      table_1c[ , typ_common_name := plant_taxa[match(table_1c[ ,typ_variable ,], plant_taxa[ , Taxon ,]), `Common Name`] , ]
      
      table_1a[ , nat_status := plant_taxa[match(table_1a[ ,nat_variable ,], plant_taxa[ , Taxon ,]), `Minnesota Native?`] , ]
      table_1b[ , non_status := plant_taxa[match(table_1b[ ,non_variable ,], plant_taxa[ , Taxon ,]), `Minnesota Native?`] , ]
      table_1c[ , typ_status := plant_taxa[match(table_1c[ ,typ_variable ,], plant_taxa[ , Taxon ,]), `Minnesota Native?`] , ]
      
      
      common_obs_by_trt <- cbind(table_1a, table_1b, table_1c)
      
      
      # fwrite(common_obs_by_trt, file = "data&scripts/data/output/common_plants_by_trt.csv")


# seeded species v. colonizing species in nat and non panels of rich, shan, % floral  --------
      
      plant_matrix_date_site_sample[ , .N , .(Seed_mix)]
      
      #for a given row, here are the seeded species we observed, sans typ site data
      
      plant_matrix_date_site_sample_notyp <- plant_matrix_date_site_sample[Seed_mix != ""]
      
      #all species colinizer metrics
      for (i in 1:nrow(plant_matrix_date_site_sample_notyp)) {
        seeded <- intersect(names(plant_matrix_date_site_sample_notyp), gsub("^s_" ,"" ,
                                                                             colnames(
                                                                               plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                            x=colnames(plant_matrix_date_site_sample_notyp)) ])[plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                                                                                                                                             x=colnames(plant_matrix_date_site_sample_notyp)) ]==1]
        ) 
        )
        
        colinizer <- setdiff(names(plant_matrix_date_site_sample_notyp[,c(10:139)]), gsub("^s_" ,"" ,
                                                                                          colnames(
                                                                                            plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                                         x=colnames(plant_matrix_date_site_sample_notyp)) ])[plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                                                                                                                                                          x=colnames(plant_matrix_date_site_sample_notyp)) ]==1]
        ) 
        )
        if (length(seeded) != 0) {
          plant_matrix_date_site_sample_notyp[i, seeded_richness := rowSums(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = seeded  ]!=0)]
          plant_matrix_date_site_sample_notyp[i, seeded_shannons := diversity(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = seeded  ],index = "shannon")]
        } else {
          plant_matrix_date_site_sample_notyp[i , seeded_richness := 0 , ]
          plant_matrix_date_site_sample_notyp[i , seeded_shannons := 0 , ]
        }
        # add columns for this row based on seeded or colinizer:
        # plant_matrix_date_site_sample_notyp[1, .SD , .SDcols = seeded  ]
        plant_matrix_date_site_sample_notyp[i, colinizer_richness := rowSums(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = colinizer  ]!=0)]
        plant_matrix_date_site_sample_notyp[i, colinizer_shannons := diversity(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = colinizer  ],index = "shannon")]
      }
      #floral species colinizer metrics
      for (i in 1:nrow(plant_matrix_date_site_sample_notyp)) {
        seeded <- intersect(intersect(names(plant_matrix_date_site_sample_notyp), gsub("^s_" ,"" ,
                                                                       colnames(
                                                                         plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                x=colnames(plant_matrix_date_site_sample_notyp)) ])[plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                                                                                                                     x=colnames(plant_matrix_date_site_sample_notyp)) ]==1]
        ) 
        ),
        floralcols)
        
        colinizer <- intersect(setdiff(names(plant_matrix_date_site_sample_notyp[,c(10:139)]), gsub("^s_" ,"" ,
                                                                                    colnames(
                                                                                      plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                             x=colnames(plant_matrix_date_site_sample_notyp)) ])[plant_matrix_date_site_sample_notyp[i , .SD , .SDcols = grep(pattern="^s_", 
                                                                                                                                                                                                                                                  x=colnames(plant_matrix_date_site_sample_notyp)) ]==1]
        ) 
        ),
        floralcols)
        
        if (length(seeded) != 0) {
          plant_matrix_date_site_sample_notyp[i, seeded_richness_floral := rowSums(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = seeded  ]!=0)]
          plant_matrix_date_site_sample_notyp[i, seeded_shannons_floral := diversity(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = seeded  ],index = "shannon")]
        }
        else {
          plant_matrix_date_site_sample_notyp[i , seeded_richness_floral := 0 , ]
          plant_matrix_date_site_sample_notyp[i , seeded_shannons_floral := 0 , ]
        }
        
        # add columns for this row based on seeded or colinizer:
        # plant_matrix_date_site_sample_notyp[1, .SD , .SDcols = seeded  ]
        plant_matrix_date_site_sample_notyp[i, colinizer_richness_floral := rowSums(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = colinizer  ]!=0)]
        plant_matrix_date_site_sample_notyp[i, colinizer_shannons_floral := diversity(plant_matrix_date_site_sample_notyp[i, .SD , .SDcols = colinizer  ],index = "shannon")]
      }
      
      
      
      plant_matrix_date_site_sample_notyp[ , .(um_total_richness, seeded_richness, colinizer_richness)]
      plant_matrix_date_site_sample_notyp[ , .(um_forb_richness, seeded_richness_floral, colinizer_richness_floral)]
      
      #check for NO NAs
      plant_matrix_date_site_sample_notyp[ , summary(.SD) , .SDcols = c("um_forb_richness", "seeded_richness_floral", "colinizer_richness_floral", "um_total_richness" , "seeded_richness", "colinizer_richness" )]
      
      
      figdat <- melt(plant_matrix_date_site_sample_notyp, 
           id.vars = c("date","site", "trt", "age"),
           measure.vars = patterns("_floral$"),
           variable.name = "metric", 
           value.name = c("value"))
      summary(figdat$metric)
      
      
      
      #Figure99:
      ggplot(figdat[metric %in% c("seeded_richness_floral", "colinizer_richness_floral")] , aes( jitter(age), jitter(value), group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        # facet_grid(rows = vars(trt) )+
        ggtitle(label = "Floral Richness")+
        geom_smooth(method = "lm", aes(color = metric))+
        ylab("# of Species of Flowers Observed")+
        xlab(" ")+
        # theme(legend.position = "none")+
        scale_color_brewer(palette = "Set2")+
        facet_wrap(~trt)
      ggplot(figdat[metric %in% c("seeded_shannons_floral", "colinizer_shannons_floral")] , aes( jitter(age), jitter(value), group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        # facet_grid(rows = vars(trt) )+
        ggtitle(label = "Floral Diversity")+
        geom_smooth(method = "lm", aes(color = metric))+
        ylab("Shannon Diversiy")+
        xlab("Age (years)")+
        # theme(legend.position = "none")+
        scale_color_brewer(palette = "Set2")+
        facet_wrap(~trt)
      
 

# visualize: Fig 2 colonizing vs. planted species phenology ----------------------------------------------------

      
      SeedFloral_rich_date_nat <- ggplot(figdat[metric %in% c("seeded_richness_floral", "colinizer_richness_floral") & trt == "nat"] , aes( date, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        # facet_grid(rows = vars(trt) )+
        geom_smooth(method = "loess", aes(color = metric))+
        coord_cartesian(ylim = c(0,15))+
        ylab("Floral Richness")+
        xlab(NULL)+
        ggtitle(label = "Native Seedmix")+
        # theme(legend.position = "none")+
        scale_color_brewer(palette = "Set2")+
        theme(legend.position = c(.2,.8), legend.background = element_blank())+
        scale_color_brewer(palette = "Set1",name="Species\nOrigin", labels = c( "Seeded","Colonizer" ))
      
      SeedFloral_rich_date_non <- ggplot(figdat[metric %in% c("seeded_richness_floral", "colinizer_richness_floral") & trt == "non"] , aes( date, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        # facet_grid(rows = vars(trt) )+
        geom_smooth(method = "loess", aes(color = metric))+
        coord_cartesian(ylim = c(0,15))+
        ylab(NULL)+
        xlab(NULL)+
        ggtitle(label = "Non-native Seedmix")+
        theme(legend.position = "none")+
        scale_color_brewer(palette = "Set1")
      
      SeedFloral_div_date_nat <- ggplot(figdat[metric %in% c("seeded_shannons_floral", "colinizer_shannons_floral")& trt == "nat"] , aes( date, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        geom_smooth(method = "loess", aes(color = metric))+
        coord_cartesian(ylim = c(0,2.25))+
        ylab("Floral Shannon Diversiy")+
        xlab("")+
        theme(legend.position = "none")+
        scale_color_brewer(palette = "Set1")
      
      SeedFloral_div_date_non <- ggplot(figdat[metric %in% c("seeded_shannons_floral", "colinizer_shannons_floral")& trt == "non"] , aes( date, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        geom_smooth(method = "loess", aes(color = metric))+
        coord_cartesian(ylim = c(0,2.25))+
        ylab(NULL)+
        xlab("")+
        theme(legend.position = "none")+
        scale_color_brewer(palette = "Set1")
      

      

      #' ## Combining
      plots.row <- align_plots(SeedFloral_rich_date_nat, SeedFloral_rich_date_non,SeedFloral_div_date_nat,SeedFloral_div_date_non, align="hv", 
                               axis="tblr")
      div.rows <- plot_grid(plots.row[[1]], plots.row[[2]], plots.row[[3]], plots.row[[4]],
                            nrow=2, #label_size = 7.5,
                            # label_fontface = "plain", labels = c("(a)", "(b)", "(c)"), 
                            hjust = -0.25, vjust = 2)
      div.rows <- ggdraw(add_sub(div.rows, "Date", vpadding=grid::unit(0,"lines"),y=6, x=0.5, vjust=4.5))
      
      # # Write figure 3
      # png(file = "Fig2_source_pheno_4panel.png", width = 6, height = 6, units = "in", res = 1200)
      div.rows
      # dev.off()
     
      
###aaaaannnnnnnd nnnnoooowwww age       

# visualize: colonizing vs. planted species by age --------------------------

      
      SeedFloral_rich_age_nat <- ggplot(figdat[metric %in% c("seeded_richness_floral", "colinizer_richness_floral") & trt == "nat"] , aes( age, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        # facet_grid(rows = vars(trt) )+
        geom_smooth(method = "lm", aes(color = metric))+
        ylab("Floral Richness")+
        xlab(NULL)+
        ggtitle(label = "Native Seedmix")+
        # theme(legend.position = "none")+
        scale_color_brewer(palette = "Set1")+
        theme(legend.position = c(.2,.8), legend.background = element_blank())+
        scale_color_brewer(palette = "Set1",name="Species\nOrigin", labels = c( "Seeded","Colonizer" ))
      
      SeedFloral_rich_age_non <- ggplot(figdat[metric %in% c("seeded_richness_floral", "colinizer_richness_floral") & trt == "non"] , aes( age, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        # facet_grid(rows = vars(trt) )+
        geom_smooth(method = "lm", aes(color = metric))+
        ylab(NULL)+
        xlab(NULL)+
        ggtitle(label = "Non-native Seedmix")+
        theme(legend.position = "none")+
        scale_color_brewer(palette = "Set1")
      
      SeedFloral_div_age_nat <- ggplot(figdat[metric %in% c("seeded_shannons_floral", "colinizer_shannons_floral")& trt == "nat"] , aes( age, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        geom_smooth(method = "lm", aes(color = metric))+
        ylab("Floral Shannon Diversiy")+
        xlab("Age (years)")+
        theme(legend.position = "none")+
        scale_color_brewer(palette = "Set1")
      
      SeedFloral_div_age_non <- ggplot(figdat[metric %in% c("seeded_shannons_floral", "colinizer_shannons_floral")& trt == "non"] , aes( age, value, group = metric))+
        geom_point(aes(color = metric), alpha = 0.25)+
        geom_smooth(method = "lm", aes(color = metric))+
        ylab(NULL)+
        xlab("Age (years)")+
        theme(legend.position = "none")+
        scale_color_brewer(palette = "Set1")
      
      
      ggarrange(SeedFloral_rich_age_nat, SeedFloral_rich_age_non,SeedFloral_div_age_nat,SeedFloral_div_age_non, nrow = 2, ncol = 2)
      

# footer ------------------------------------------------------------------

sessionInfo()