# MODEL: Family richness of Araneae with Poisson distribution # INPUT: - Arthropod ID and measuring data: combined_id_meas_manually_cleaned.csv # - Vegetation data: all_veg_cc.csv # OUTPUT: - Table of model step 1 results with covariate estimates, 95% confidence # intervals, p-values, marginal R2 values, and conditional R2 values # - Table of model step 2 results with covariate estimates, 95% confidence # intervals, p-values, marginal R2 values, and conditional R2 values # Load libraries library(dplyr) library(tidyr) library(forcats) library(ggplot2) library(tibble) library(MASS) library(r2glmm) library(nlme) library(MuMIn) # Read in arthropod and vegetation data dat = read.csv("data/20230303_combined_id_meas_manually_cleaned.csv") # arthropods veg = read.csv("data/20190610_all_veg_cc.csv") # vegetation ################# Calculate arthropod biomass ################################## ##### Functions: biomass by order - Straus & Aviles 2018) # Coleoptera bio.col <- function(x) { mg.col = 10^(-1.960 + 2.966*(log10(x))) return(mg.col) } # Orthoptera bio.ort <- function(x) { mg.ort = 10^(-1.928 + 2.680*(log10(x))) return(mg.ort) } # Hymenoptera bio.hym <- function(x) { mg.hym = 10^(-0.611 + 1.141*(log10(x))) return(mg.hym) } # Lepidoptera bio.lep <- function(x) { mg.lep = 10^(-3.169 + 3.624*(log10(x))) return(mg.lep) } # Hemiptera bio.hem <- function(x) { mg.hem = 10^(-1.749 + 2.529*(log10(x))) return(mg.hem) } # Diptera bio.dip <- function(x) { mg.dip = 10^(-1.361 + 2.026*(log10(x))) return(mg.dip) } # Araneae bio.ara <- function(x) { mg.ara = 10^(-2.643 + 4.011*(log10(x))) return(mg.ara) } # All groups bio.all <- function(x) { mg.all = 10^(-1.412 + 2.085*(log10(x))) return(mg.all) } # Test functions y = 20 bio.ort(y) bio.col(y) bio.hym(y) bio.lep(y) bio.hem(y) bio.dip(y) bio.ara(y) bio.all(y) # Make all columns character dat[] = lapply(dat, as.character) # Adjust data types dat = dat %>% mutate_at(c("key", "code", "year", "site", "timing", "transect", "distance", "ID", "ID_family", "ID_order"), as.factor) %>% # columns to be factors mutate_at(c("mm_1", "mm_2", "mm_3", "mm_4", "mm_5", "mm_6", "mm_7", "mm_8", "mm_9", "mm_10", "ct_1", "ct_2", "ct_3", "ct_4", "ct_5", "ct_6", "ct_7", "ct_8", "ct_9", "ct_10"), as.numeric) # Columns to be numeric. Warnings: "." changed to NA. # Make family & order "." = NA dat$ID_family[dat$ID_family == "."] = NA dat$ID_order[dat$ID_order == "."] = NA # Biomass Coleoptera dat$bio_1= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_1), NA) dat$bio_2= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_2), NA) dat$bio_3= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_3), NA) dat$bio_4= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_4), NA) dat$bio_5= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_5), NA) dat$bio_6= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_6), NA) dat$bio_7= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_7), NA) dat$bio_8= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_8), NA) dat$bio_9= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_9), NA) dat$bio_10= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_10), NA) # Biomass Orthoptera dat$bio_1= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_10), dat$bio_10) # Biomass Hymenoptera dat$bio_1= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_10), dat$bio_10) # Biomass Lepidoptera dat$bio_1= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_10), dat$bio_10) # Biomass Hemiptera dat$bio_1= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_10), dat$bio_10) # Biomass Diptera dat$bio_1= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_10), dat$bio_10) # Biomass Araneae dat$bio_1= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_10), dat$bio_10) # Biomass other: for orders other than Coleoptera, Orthoptera, Hymenoptera, # Lepidoptera, Hemiptera, Diptera, or Araneae dat$bio_1= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_10), dat$bio_10) # Biomass other: for order = Unknown dat$bio_1= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_10), dat$bio_10) # Biomass other: for order = NA dat$bio_1= ifelse(is.na(dat$ID_order), bio.all(dat$mm_1), dat$bio_1) dat$bio_2= ifelse(is.na(dat$ID_order), bio.all(dat$mm_2), dat$bio_2) dat$bio_3= ifelse(is.na(dat$ID_order), bio.all(dat$mm_3), dat$bio_3) dat$bio_4= ifelse(is.na(dat$ID_order), bio.all(dat$mm_4), dat$bio_4) dat$bio_5= ifelse(is.na(dat$ID_order), bio.all(dat$mm_5), dat$bio_5) dat$bio_6= ifelse(is.na(dat$ID_order), bio.all(dat$mm_6), dat$bio_6) dat$bio_7= ifelse(is.na(dat$ID_order), bio.all(dat$mm_7), dat$bio_7) dat$bio_8= ifelse(is.na(dat$ID_order), bio.all(dat$mm_8), dat$bio_8) dat$bio_9= ifelse(is.na(dat$ID_order), bio.all(dat$mm_9), dat$bio_9) dat$bio_10= ifelse(is.na(dat$ID_order), bio.all(dat$mm_10), dat$bio_10) # Make columns numeric dat = dat %>% mutate_at(c("bio_1", "bio_2", "bio_3", "bio_4", "bio_5", "bio_6", "bio_7", "bio_8", "bio_9", "bio_10"), as.numeric) # Multiply each biomass by count into new column mbio_# dat = dat %>% mutate(mbio_1 = bio_1*ct_1, mbio_2 = bio_2*ct_2, mbio_3 = bio_3*ct_3, mbio_4 = bio_4*ct_4, mbio_5 = bio_5*ct_5, mbio_6 = bio_6*ct_6, mbio_7 = bio_7*ct_7, mbio_8 = bio_8*ct_8, mbio_9 = bio_9*ct_9, mbio_10 = bio_10*ct_10) # New column mbio_sum: sum of mbio_1-10 dat = dat %>% mutate(mbio_sum = rowSums(dat[,c("mbio_1", "mbio_2", "mbio_3", "mbio_4", "mbio_5", "mbio_6", "mbio_7", "mbio_8", "mbio_9", "mbio_10")], na.rm = TRUE)) # exclude NAs # New column count_sum: sum of ct_1-10 dat = dat %>% mutate(count_sum = rowSums(dat[,c("ct_1", "ct_2", "ct_3", "ct_4", "ct_5", "ct_6", "ct_7", "ct_8", "ct_9", "ct_10")], na.rm = TRUE)) # exclude NAs # Ideally, column "count_sum" should = column "count" ######################## Count number of vials in dataset ################################# dat %>% distinct(site,timing,transect,distance,ID) %>% tally() # 7640 vials ######################### Count vials with no calcs ######################################### # Count vials with no mbio_sum measurement dat %>% filter(mbio_sum == 0) %>% tally() # 3 vials # Count vials with no count_sum measurement dat %>% filter(count_sum == 0) %>% tally () # 3 vials ##################### Summarize data by vials ############################################# # Each vial own row vialsum = dat %>% group_by(site, timing, transect, distance, ID, ID_family, ID_order, ID_notes) %>% summarize(totalbio = sum(mbio_sum, na.rm = TRUE), totalct = sum(count_sum, na.rm = TRUE)) # Add column of site type (control/treatment) vialsum$type = ifelse(vialsum$site == "HL", "Treatment", "Control") vialsum$type = ifelse(vialsum$site == "LM", "Treatment", vialsum$type) vialsum$type = ifelse(vialsum$site == "MS", "Treatment", vialsum$type) vialsum$type = ifelse(vialsum$site == "SM", "Treatment", vialsum$type) vialsum$type = ifelse(vialsum$site == "WW", "Treatment", vialsum$type) ############ Add Larvae/Adult to Lepidoptera ############### # New column larva: "Y" if ID_notes contain words larva/larvae vialsum = vialsum %>% ungroup() %>% mutate(larva = ifelse(grepl("larva|larvae", vialsum$ID_notes, ignore.case = TRUE), "Y", "N")) # New column ID_order.new with new "Lepidoptera Larva" value vialsum = vialsum %>% mutate(ID_order.new = ifelse(ID_order == "Lepidoptera" & larva == "Y", "Lepidoptera Larva", as.character(ID_order))) # Have vialsum only include new ID_order column vialsum = vialsum %>% dplyr::select(-ID_order, -ID_notes) # Rename ID_order.new to ID_order names(vialsum)[names(vialsum) == "ID_order.new"] <- "ID_order" # Rename "Lepidoptera" to "Lepidoptera Adult" vialsum$ID_order[vialsum$ID_order == 'Lepidoptera'] <- 'Lepidoptera Adult' # Group data by site/timing/transect/distance/order/family/type - some arthropods with same family & order are split into separate vials ins = vialsum %>% group_by(site, timing, transect, distance, ID_order, ID_family, type) %>% summarize(bio = sum(totalbio, na.rm = TRUE), ct = sum(totalct, na.rm = TRUE)) ############ Correct for HL pre-spraying transects being longer than all other transects ###################### # HL pre-spraying transects were 60 m long while all other transects were 40 m long # Multiply bio & ct by (2/3) for all HL pre-spraying samples ins = ins %>% mutate(bio = ifelse(site == "HL" & timing == "P", bio*(2/3), bio), ct = ifelse(site == "HL" & timing == "P", ct*(2/3), ct)) ############################### Veg data ###################################### # Explore data types sapply(veg, class) # Make columns numeric. Need to change to character first, since these columns currently # contain factors. Columns 40-50 (canopy cover) are already numeric. veg[, c(19:26, 32:35)] = lapply(veg[, c(19:26, 32:35)], as.character) veg[, c(19:26, 32:35)] = lapply(veg[, c(19:26, 32:35)], as.numeric) # Create column vor_mean (average n/s/e/w readings) for (i in 1:nrow(veg)){ veg$vor_mean[i] = ((veg$vor_n[i] + veg$vor_e[i] + veg$vor_s[i] + veg$vor_w[i])/4) } # Create column gc.lit.p: percentage of ground cover that is litter for (i in 1:nrow(veg)){ veg$gc.lit.p[i] = (((veg$gc_lit[i])/30)*100) } # Create column gc.bg.p: percentage of ground cover that is bare ground for (i in 1:nrow(veg)){ veg$gc.bg.p[i] = (((veg$gc_bg[i])/30)*100) } # Create column gc.oth.p - percentage of ground cover that is other for (i in 1:nrow(veg)){ veg$gc.oth.p[i] = (((veg$gc_oth[i])/30)*100) } # Create column cc.live.p: percentage of canopy cover consisting of grass, forb, or woody for (i in 1:nrow(veg)){ veg$cc.live.p[i] = (veg$grass.p.adj[i] + veg$forb.p.adj[i] + veg$woody.p.adj[i]) } # Create column sp_total: sum of sp_grass + sp_forb for (i in 1:nrow(veg)){ veg$sp_total[i] = (veg$sp_grass[i] + veg$sp_forb[i]) } # Create new column vor_field with vor reading from direction of field veg$vor_field = ifelse(veg$site=="FC", veg$vor_w, ".") # populate other cells with "." veg$vor_field = ifelse(veg$site=="WF", veg$vor_s, veg$vor_field) veg$vor_field = ifelse(veg$site=="HL", veg$vor_w, veg$vor_field) veg$vor_field = ifelse(veg$site=="MS", veg$vor_e, veg$vor_field) veg$vor_field = ifelse(veg$site=="SM", veg$vor_e, veg$vor_field) veg$vor_field = ifelse(veg$site=="WW", veg$vor_w, veg$vor_field) veg$vor_field = ifelse(veg$site=="DH", veg$vor_w, veg$vor_field) veg$vor_field = ifelse(veg$site=="DH" & veg$transect == "T" & veg$timing == "20", veg$vor_n, veg$vor_field) # T2 transect only will use vor_n veg$vor_field = ifelse(veg$site=="LM", veg$vor_w, veg$vor_field) veg$vor_field = ifelse(veg$site=="RH", veg$vor_w, veg$vor_field) # Make this column numeric veg$vor_field = as.numeric(veg$vor_field) # Rename columns names(veg)[names(veg) == 'grass.p.adj'] <- 'cc.grass.p' names(veg)[names(veg) == 'forb.p.adj'] <- 'cc.forb.p' names(veg)[names(veg) == 'dead.p.adj'] <- 'cc.dead.p' names(veg)[names(veg) == 'woody.p.adj'] <- 'cc.woody.p' names(veg)[names(veg) == 'other.p.adj'] <- 'cc.other.p' # Remove white space in column cc_comment veg$cc_comment = as.character(veg$cc_comment) veg$cc_comment = trimws(veg$cc_comment, which = c("both")) # Make cc_comment = NA and blank to "." veg$cc_comment[is.na(veg$cc_comment)] = "." veg$cc_comment[veg$cc_comment == ""] = "." # Subset "veg" to include select variables only veg.sub = veg %>% filter(!(timing == "P" & plot == "R")) %>% # exclude pre-spray R plots # keep only C and L plots from pre-spraying(start/end of insect collection in ethanol) dplyr::select(site, timing, transect, distance, plot, lit, mhl, mhd, sp_grass, sp_forb, sp_total, gc.lit.p, gc.bg.p, gc.oth.p, cc.grass.p, cc.forb.p, cc.dead.p, cc.woody.p, cc.other.p, vor_mean, vor_field) # Calculate means veg.mean = veg.sub %>% group_by(site, timing, transect, distance) %>% summarize(lit = mean(lit), mhl = mean(mhl), mhd = mean(mhd), sp_grass = mean(sp_grass), sp_forb = mean(sp_forb), sp_total = mean(sp_total), gc.lit.p = mean(gc.lit.p), gc.bg.p = mean(gc.bg.p), gc.oth.p = mean(gc.oth.p), cc.grass.p = mean(cc.grass.p), cc.forb.p = mean(cc.forb.p), cc.dead.p = mean(cc.dead.p), cc.woody.p = mean(cc.woody.p), cc.other.p = mean(cc.other.p), vor_mean = mean(vor_mean), vor_field = mean(vor_field)) ############################# Join veg and insect data ###################### # All to character veg.mean[] = lapply(veg.mean, as.character) ins[] = lapply(ins, as.character) # Combine ins.veg = ins %>% left_join(veg.mean, by = c("site", "timing", "transect", "distance")) # Create new column "year" ins.veg$year = ifelse((ins.veg$site == "DH" | ins.veg$site == "HL" | ins.veg$site == "RH" | ins.veg$site == "LM"), "2017", "2018") # Adjust data types ins.veg = ins.veg %>% mutate_at(c("site", "timing", "transect", "ID_order", "ID_family", "type", "year"), as.factor) %>% # columns to be factors mutate_at(c("distance", "bio", "ct", "lit", "mhl", "mhd", "sp_grass", "sp_forb", "sp_total", "gc.lit.p", "gc.bg.p", "gc.oth.p", "cc.grass.p", "cc.forb.p", "cc.dead.p", "cc.woody.p", "cc.other.p", "vor_mean", "vor_field"), as.numeric) # columns to be numeric # Exclude 1 observation with timing = "." ins.veg = ins.veg %>% filter(timing != ".") # Exclude 1 observation from HL P T 25 - did not collect on transect T pre-spraying ins.veg = ins.veg %>% filter(!(timing == "P" & transect == "T")) ################################################################################### ################################################################################### ##### Calculate total and bird food abundance/biomass and family richness ##### ### Calculate total abundance and total biomass dat.total = ins.veg %>% group_by(site, timing, transect, distance) %>% mutate(ct.sum.total = sum(ct), bio.sum.total = sum(bio)) %>% distinct(site, timing, transect, distance, .keep_all = TRUE) %>% dplyr::select(-ID_order, -ID_family, -bio, -ct) ### Calculate bird food abundance and biomass dat.bf = ins.veg %>% filter(ID_order == "Coleoptera" | ID_order == "Lepidoptera Larva" | ID_order == "Orthoptera" | ID_order == "Araneae") %>% group_by(site, timing, transect, distance) %>% mutate(ct.sum.bf = sum(ct), bio.sum.bf = sum(bio)) %>% distinct(site, timing, transect, distance, .keep_all = TRUE) %>% dplyr::select(site, timing, transect, distance, ct.sum.bf, bio.sum.bf) # There is 1 fewer observation in bf (296) than total (297): 1 vial didn't have any of the bf groups. # When joined to dat.total, ct.sum.bf and bio.sum.bf will be NA for this vial (DH 20 Z 25). ##### Calculate family richness ##### ### Araneae # Calculate family richness of Araneae by vial, exclude missing family ID dat.ara = ins.veg %>% filter((ID_order == "Araneae") & ID_family != "(Missing)") %>% group_by(site, timing, transect, distance) %>% mutate(fam.ara = length(unique(ID_family))) %>% distinct(site, timing, transect, distance, .keep_all = TRUE) %>% dplyr::select(site, transect, timing, distance, fam.ara) ### Coleoptera # Calculate family richness of Coleoptera by vial, exclude missing family ID dat.col = ins.veg %>% filter((ID_order == "Coleoptera") & ID_family != "(Missing)") %>% group_by(site, timing, transect, distance) %>% mutate(fam.col = length(unique(ID_family))) %>% distinct(site, timing, transect, distance, .keep_all = TRUE) %>% dplyr::select(site, transect, timing, distance, fam.col) ### Hemiptera # Calculate family richness of Hemiptera by vial, exclude missing family ID dat.hem = ins.veg %>% filter((ID_order == "Hemiptera") & ID_family != "(Missing)") %>% group_by(site, timing, transect, distance) %>% mutate(fam.hem = length(unique(ID_family))) %>% distinct(site, timing, transect, distance, .keep_all = TRUE) %>% dplyr::select(site, transect, timing, distance, fam.hem) ### Orthoptera # Calculate family richness of Orthoptera by vial, exclude missing family ID dat.ort = ins.veg %>% filter((ID_order == "Orthoptera") & ID_family != "(Missing)") %>% group_by(site, timing, transect, distance) %>% mutate(fam.ort = length(unique(ID_family))) %>% distinct(site, timing, transect, distance, .keep_all = TRUE) %>% dplyr::select(site, transect, timing, distance, fam.ort) ##### Join newly calculated columns to dat.total dat.join = dat.total %>% full_join(dat.bf, by = c("site", "timing", "transect", "distance")) %>% full_join(dat.ara, by = c("site", "timing", "transect", "distance")) %>% full_join(dat.col, by = c("site", "timing", "transect", "distance")) %>% full_join(dat.hem, by = c("site", "timing", "transect", "distance")) %>% full_join(dat.ort, by = c("site", "timing", "transect", "distance")) ##### Replace NA in bf and family richness columns with 0 dat.join <- dat.join %>% mutate(ct.sum.bf = ifelse(is.na(ct.sum.bf), 0, ct.sum.bf), bio.sum.bf = ifelse(is.na(bio.sum.bf), 0, bio.sum.bf), fam.ara = ifelse(is.na(fam.ara), 0, fam.ara), fam.col = ifelse(is.na(fam.col), 0, fam.col), fam.hem = ifelse(is.na(fam.hem), 0, fam.hem), fam.ort = ifelse(is.na(fam.ort), 0, fam.ort)) ################################################################################# ################################################################################# ######################### Sample sizes ################################## # New column transect.type: primary or supplementary transect dat.join = dat.join %>% mutate(transect.type = ifelse(transect == "T", "Supplementary", "Primary")) # Number of samples on primary and supplementary transects by site type & timing dat.summary = dat.join %>% group_by(type, timing, transect.type) %>% tally() ######################## Exploration histogram ########################## # Subset to primary transects only dat.sub.primary <- dat.join %>% filter(transect.type == 'Primary') # Family richness: Araneae p.fam.ara = ggplot(dat.sub.primary, aes(x=fam.ara)) + geom_histogram() + geom_vline(aes(xintercept=mean(fam.ara)), color="blue", linetype="dashed", linewidth=1) + ggtitle("Family richness: Araneae") p.fam.ara ########################## Model Step 1 ###################################### # Subset data: primary transects, pre-spraying dat.step1 <- dat.join %>% filter(transect.type == "Primary" & timing == "P") # Sample size samplesize.step1 = dat.step1 %>% group_by(type, timing) %>% tally() ##### Model mod.step1 = glmmPQL(fam.ara ~ cc.forb.p + mhl + sp_total + gc.lit.p + year, random = (~ 1 | site/transect), family = poisson, data = dat.step1) ##### Parameter confidence intervals ##### # Summary step1.sum = summary(mod.step1) # Table with global model covariate confidence intervals step1.int = intervals(mod.step1, which = "fixed") # default level = 0.95 step1.inttab = data.frame(step1.int$fixed) %>% rownames_to_column(var = "covariate") %>% mutate(incl0 = 0 >= lower & 0 <= upper) # Table with global model p-values step1.pvals = data.frame(step1.sum$tTable) %>% rownames_to_column(var = "covariate") %>% dplyr::select(covariate, p.value) # Join p-values to inttab: new table "ci.table" step1.ci.table = step1.inttab %>% left_join(step1.pvals, by = "covariate") %>% rename(est = est., ci.lower = lower, ci.upper = upper) %>% # rename columns (newname = oldname) dplyr::select(covariate, est, ci.lower, ci.upper, incl0, p.value) # reorder columns ### Add R2 values to ci.table # Notes: From the help page for this package (MuMIn): #"The delta method can be used with for # all distributions and link functions, while lognormal approximation and trigamma # function are limited to distributions with logarithmic link. Trigamma-estimate # is recommended whenever available." # "As of MuMIn version 1.41.0, # r.squaredGLMM returns a revised statistics based on Nakagawa et al. (2017) paper. # The returned value's format also has changed (it is a matrix rather than a # numeric vector as before). Pre-1.41.0 version of the function calculated the # “theoretical” R2GLMM for binomial models." The version used in this analysis is # 1.47.5, and the output included in the manuscript is the revised statistic. r.squaredGLMM(mod.step1) step1.ci.table = step1.ci.table %>% mutate(marg_r2_trigamma = r.squaredGLMM(mod.step1)[3], cond_r2_trigamma = r.squaredGLMM(mod.step1)[6]) ############################### Model Step 2 ################################# # Subset data: primary transects, 0 & 25 m dat.step2 = dat.join %>% filter(transect.type == "Primary") %>% filter (distance == 0 | distance == 25) %>% arrange(factor(timing, levels = c("P", "3", "20"))) # Re-level so pre-spray (timing = P) samples are first dat.step2 = within(dat.step2, timing <- relevel(timing, ref = "P")) # Sample size samplesize.step2 = dat.step2 %>% group_by(type, timing) %>% tally() ##### Model step 2 mod.step2 = glmmPQL(fam.ara ~ mhl + type*timing, random = (~ 1 | site/transect), family = poisson, data = dat.step2) ##### Parameter confidence intervals ##### # Summary step2.sum = summary(mod.step2) # Table with global model covariate confidence intervals step2.int = intervals(mod.step2, which = "fixed") # default level = 0.95 step2.inttab = data.frame(step2.int$fixed) %>% rownames_to_column(var = "covariate") %>% mutate(incl0 = 0 >= lower & 0 <= upper) # Table with global model p-values step2.pvals = data.frame(step2.sum$tTable) %>% rownames_to_column(var = "covariate") %>% dplyr::select(covariate, p.value) # Join p-values to inttab: new table "ci.table" step2.ci.table = step2.inttab %>% left_join(step2.pvals, by = "covariate") %>% rename(est = est., ci.lower = lower, ci.upper = upper) %>% # rename columns (newname = oldname) dplyr::select(covariate, est, ci.lower, ci.upper, incl0, p.value) # reorder columns # Add R2 values to ci.table # Note: From the help page for this package (MuMIn): "As of MuMIn version 1.41.0, # r.squaredGLMM returns a revised statistics based on Nakagawa et al. (2017) paper. # The returned value's format also has changed (it is a matrix rather than a # numeric vector as before). Pre-1.41.0 version of the function calculated the # “theoretical” R2GLMM for binomial models." The version used in this analysis is # 1.47.5, and the output included in the manuscript is the revised statistic. r.squaredGLMM(mod.step2) step2.ci.table = step2.ci.table %>% mutate(marg_r2_trigamma = r.squaredGLMM(mod.step2)[3], cond_r2_trigamma = r.squaredGLMM(mod.step2)[6]) # Reorder rows step2.ci.table = step2.ci.table %>% arrange(factor(covariate, levels = c('(Intercept)', 'mhl', 'typeTreatment', 'timing3', 'timing20', 'typeTreatment:timing3', 'typeTreatment:timing20'))) ####################### Diagnostic plots ##################################### ##### Step 1 ##### qqnorm(mod.step1, main = 'Model step 1') plot(mod.step1, main = 'Model step 1') ##### Step 2 ##### qqnorm(mod.step2, main = 'Model step 2') plot(mod.step2, main = 'Model step 2') ########################## Export csv ################################### # Datestamp datestamp=as.Date(Sys.Date()) datestamp=format(datestamp, format="%Y%m%d") # Step 1 table with covariate estimates, confidence intervals, p-values, and r2 values write.csv(step1.ci.table, file = paste0("output/", datestamp, "_Ch2_models_fam_ara_poisson_confint_step1.csv"),row.names = FALSE) # Step 2 table with covariate estimates, confidence intervals, p-values, and r2 values write.csv(step2.ci.table, file = paste0("output/", datestamp, "_Ch2_models_fam_ara_poisson_confint_step2.csv"),row.names = FALSE)