# INPUT: Veg data (all_veg_cc.csv)

# OUTPUT: JWM manuscript Table S2 (Supporting Information): Means and standard 
#         deviations (SD) for vegetation characteristics measured at the beginning
#         and end points of arthropod collection transects at 0, 25, and 100 m 
#         from the field edge along primary transects.


# Load libraries
library(dplyr)

# Read in vegetation data
veg = read.csv("data/20190610_all_veg_cc.csv")

# Explore data types
sapply(veg, class)

# Make columns numeric. Need to change to character first, since these columns currently
# contain factors. Columns 40-50 (canopy cover) are already numeric.
veg[, c(19:26, 32:35)] = lapply(veg[, c(19:26, 32:35)], as.character)
veg[, c(19:26, 32:35)] = lapply(veg[, c(19:26, 32:35)], as.numeric)

# Create column vor_mean (average n/s/e/w readings)
for (i in 1:nrow(veg)){
  veg$vor_mean[i] = ((veg$vor_n[i] + veg$vor_e[i] + veg$vor_s[i] + veg$vor_w[i])/4)
}

# Create column gc.lit.p: percentage of ground cover that is litter
for (i in 1:nrow(veg)){
  veg$gc.lit.p[i] = (((veg$gc_lit[i])/30)*100)
}

# Create column gc.bg.p: percentage of ground cover that is bare ground
for (i in 1:nrow(veg)){
  veg$gc.bg.p[i] = (((veg$gc_bg[i])/30)*100)
}

# Create column gc.oth.p - percentage of ground cover that is other
for (i in 1:nrow(veg)){
  veg$gc.oth.p[i] = (((veg$gc_oth[i])/30)*100)
}

# Create column cc.live.p: percentage of canopy cover consisting of grass, forb, or woody
for (i in 1:nrow(veg)){
  veg$cc.live.p[i] = (veg$grass.p.adj[i] + veg$forb.p.adj[i] + veg$woody.p.adj[i])
}

# Create column sp_total: sum of sp_grass + sp_forb
for (i in 1:nrow(veg)){
  veg$sp_total[i] = (veg$sp_grass[i] + veg$sp_forb[i])
}

# Create new column vor_field with vor reading from direction of field
veg$vor_field = ifelse(veg$site=="FC", veg$vor_w, ".") # populate other cells with "."
veg$vor_field = ifelse(veg$site=="WF", veg$vor_s, veg$vor_field)
veg$vor_field = ifelse(veg$site=="HL", veg$vor_w, veg$vor_field)
veg$vor_field = ifelse(veg$site=="MS", veg$vor_e, veg$vor_field)
veg$vor_field = ifelse(veg$site=="SM", veg$vor_e, veg$vor_field)
veg$vor_field = ifelse(veg$site=="WW", veg$vor_w, veg$vor_field)
veg$vor_field = ifelse(veg$site=="DH", veg$vor_w, veg$vor_field)
veg$vor_field = ifelse(veg$site=="DH" & veg$transect == "T" & veg$timing == "20", veg$vor_n, veg$vor_field) # T2 transect only will use vor_n
veg$vor_field = ifelse(veg$site=="LM", veg$vor_w, veg$vor_field)
veg$vor_field = ifelse(veg$site=="RH", veg$vor_w, veg$vor_field)

# Make this column numeric
veg$vor_field = as.numeric(veg$vor_field)

# Rename columns 
names(veg)[names(veg) == 'grass.p.adj'] <- 'cc.grass.p'
names(veg)[names(veg) == 'forb.p.adj'] <- 'cc.forb.p'
names(veg)[names(veg) == 'dead.p.adj'] <- 'cc.dead.p'
names(veg)[names(veg) == 'woody.p.adj'] <- 'cc.woody.p'
names(veg)[names(veg) == 'other.p.adj'] <- 'cc.other.p'

# Remove white space in column cc_comment
veg$cc_comment = as.character(veg$cc_comment)
veg$cc_comment = trimws(veg$cc_comment, which = c("both"))

# Make cc_comment = NA and blank to "."
veg$cc_comment[is.na(veg$cc_comment)] = "."
veg$cc_comment[veg$cc_comment == ""] = "."

# Subset to keep only C and L plots pre-spraying, then L & R plots at 3 & 20 d
# (these are the start/end points of insect collection of samples 
# stored in ethanol), keep only primary transects (X/Y/Z), keep only 0 & 25 & 100
# m plots
veg.sub = veg %>% 
  filter(!(timing == "P" & plot == "R")) %>% # exclude pre-spray R plots %>%
  filter(transect == 'X' | transect == 'Y' | transect == 'Z') %>% 
  filter(distance == 0 | distance == 25 | distance == 100) %>% 
  dplyr::select(site,
                timing,
                transect,
                distance,
                plot,
                lit,
                mhl,
                mhd,
                sp_grass,
                sp_forb,
                sp_total,
                gc.lit.p,
                gc.bg.p,
                gc.oth.p,
                cc.grass.p,
                cc.forb.p,
                cc.dead.p,
                cc.woody.p,
                cc.other.p,
                vor_mean,
                vor_field)

# Calculate means
veg.mean = veg.sub %>% 
  group_by(site, timing) %>%
  summarize(n = n(),
            lit.m = mean(lit, na.rm = TRUE),
            lit.sd = sd(lit, na.rm = TRUE),
            mhl.m = mean(mhl, na.rm = TRUE),
            mhl.sd = sd(mhl, na.rm = TRUE),
            mhd.m = mean(mhd, na.rm = TRUE),
            mhd.sd = sd(mhd, na.rm = TRUE),
            sp_grass.m = mean(sp_grass, na.rm = TRUE),
            sp_grass.sd = sd(sp_grass, na.rm = TRUE),
            sp_forb.m = mean(sp_forb, na.rm = TRUE),
            sp_forb.sd = sd(sp_forb, na.rm = TRUE),
            sp_total.m = mean(sp_total, na.rm = TRUE),
            sp_total.sd = sd(sp_total, na.rm = TRUE),
            gc.lit.p.m = mean(gc.lit.p, na.rm = TRUE),
            gc.lit.p.sd = sd(gc.lit.p, na.rm = TRUE),
            gc.bg.p.m = mean(gc.bg.p, na.rm = TRUE),
            gc.bg.p.sd = sd(gc.bg.p, na.rm = TRUE),
            gc.oth.p.m = mean(gc.oth.p, na.rm = TRUE),
            gc.oth.p.sd = sd(gc.oth.p, na.rm = TRUE),
            cc.grass.p.m = mean(cc.grass.p, na.rm = TRUE),
            cc.grass.p.sd = sd(cc.grass.p, na.rm = TRUE),
            cc.forb.p.m = mean(cc.forb.p, na.rm = TRUE),
            cc.forb.p.sd = sd(cc.forb.p, na.rm = TRUE),
            cc.dead.p.m = mean(cc.dead.p, na.rm = TRUE),
            cc.dead.p.sd = sd(cc.dead.p, na.rm = TRUE),
            cc.woody.p.m = mean(cc.woody.p, na.rm = TRUE),
            cc.woody.p.sd = sd(cc.woody.p, na.rm = TRUE),
            cc.other.p.m = mean(cc.other.p, na.rm = TRUE),
            cc.other.p.sd = sd(cc.other.p, na.rm = TRUE),
            vor_mean.m = mean(vor_mean, na.rm = TRUE),
            vor_mean.sd = sd(vor_mean, na.rm = TRUE),
            vor_field.m = mean(vor_field, na.rm = TRUE),
            vor_field.sd = sd(vor_field, na.rm = TRUE))

# Rename sites and reorder rows
veg.sort = veg.mean %>% 
  mutate(site = recode(site, 'HL' = 'Focal A', 'LM' = 'Focal B', 'WW' = 'Focal C', 
                       'MS' = 'Focal D', 'SM' = 'Focal E', 'DH' = 'Reference A',
                       'RH' = 'Reference B', 'WF' = 'Reference C', 
                       'FC' = 'Reference D')) %>% 
  arrange(site, factor(timing, levels = c("P", "3", "20")))

# Datestamp
datestamp=as.Date(Sys.Date())
datestamp=format(datestamp, format="%Y%m%d")

# Export table of rounded values
write.csv(veg.sort, file = paste0("output/", datestamp, "_veg_summary_primarytransects_0-25-100m.csv"), 
          row.names = FALSE)