# INPUT: - Cleaned, combined arthropod ID and measuring data: 
#         combined_id_meas_manually_cleaned.csv

# OUTPUT: JWM manuscript Table S1 (Supporting Information): table of percentages 
#         of total arthropod abundance and dry consumable biomass by taxon.


# Load libraries
library(dplyr)
library(ggplot2)
library(forcats)

# Read in insect data
dat = read.csv("data/20230303_combined_id_meas_manually_cleaned.csv")

################# Calculate arthropod biomass ##################################

##### Functions: biomass by order - Straus & Aviles 2018)

# Coleoptera
bio.col <- function(x) {
  mg.col = 10^(-1.960 + 2.966*(log10(x)))
  return(mg.col)
}

# Orthoptera
bio.ort <- function(x) {
  mg.ort = 10^(-1.928 + 2.680*(log10(x)))
  return(mg.ort)
}

# Hymenoptera
bio.hym <- function(x) {
  mg.hym = 10^(-0.611 + 1.141*(log10(x)))
  return(mg.hym)
}

# Lepidoptera
bio.lep <- function(x) {
  mg.lep = 10^(-3.169 + 3.624*(log10(x)))
  return(mg.lep)
}

# Hemiptera
bio.hem <- function(x) {
  mg.hem = 10^(-1.749 + 2.529*(log10(x)))
  return(mg.hem)
}

# Diptera
bio.dip <- function(x) {
  mg.dip = 10^(-1.361 + 2.026*(log10(x)))
  return(mg.dip)
}

# Araneae
bio.ara <- function(x) {
  mg.ara = 10^(-2.643 + 4.011*(log10(x)))
  return(mg.ara)
}

# All groups
bio.all <- function(x) {
  mg.all = 10^(-1.412 + 2.085*(log10(x)))
  return(mg.all)
}

# Test functions
y = 20
bio.ort(y)
bio.col(y)
bio.hym(y)
bio.lep(y)
bio.hem(y)
bio.dip(y)
bio.ara(y)
bio.all(y)

# Make all columns character
dat[] = lapply(dat, as.character)

# Adjust data types
dat = dat %>%
  mutate_at(c("key",
              "code",
              "year",
              "site", 
              "timing",
              "transect",
              "distance",
              "ID",
              "ID_family",
              "ID_order"), 
            as.factor) %>% # columns to be factors
  mutate_at(c("mm_1",
              "mm_2",
              "mm_3",
              "mm_4",
              "mm_5",
              "mm_6",
              "mm_7",
              "mm_8",
              "mm_9",
              "mm_10",
              "ct_1",
              "ct_2",
              "ct_3",
              "ct_4",
              "ct_5",
              "ct_6",
              "ct_7",
              "ct_8",
              "ct_9",
              "ct_10"), 
            as.numeric) # Columns to be numeric. Warnings: "." changed to NA.

# Make family & order "." = NA
dat$ID_family[dat$ID_family == "."] = NA
dat$ID_order[dat$ID_order == "."] = NA

# Biomass Coleoptera
dat$bio_1= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_1), NA)
dat$bio_2= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_2), NA)
dat$bio_3= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_3), NA)
dat$bio_4= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_4), NA)
dat$bio_5= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_5), NA)
dat$bio_6= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_6), NA)
dat$bio_7= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_7), NA)
dat$bio_8= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_8), NA)
dat$bio_9= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_9), NA)
dat$bio_10= ifelse(dat$ID_order == "Coleoptera", bio.col(dat$mm_10), NA)

# Biomass Orthoptera
dat$bio_1= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order == "Orthoptera", bio.ort(dat$mm_10), dat$bio_10)

# Biomass Hymenoptera
dat$bio_1= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order == "Hymenoptera", bio.hym(dat$mm_10), dat$bio_10)

# Biomass Lepidoptera
dat$bio_1= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order == "Lepidoptera", bio.lep(dat$mm_10), dat$bio_10)

# Biomass Hemiptera
dat$bio_1= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order == "Hemiptera", bio.hem(dat$mm_10), dat$bio_10)

# Biomass Diptera
dat$bio_1= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order == "Diptera", bio.dip(dat$mm_10), dat$bio_10)

# Biomass Araneae
dat$bio_1= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order == "Araneae", bio.ara(dat$mm_10), dat$bio_10)

# Biomass other: for orders other than Coleoptera, Orthoptera, Hymenoptera,
# Lepidoptera, Hemiptera, Diptera, or Araneae
dat$bio_1= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order != "Coleoptera" & dat$ID_order != "Orthoptera" & dat$ID_order != "Hymenoptera" & dat$ID_order != "Lepidoptera" & dat$ID_order != "Hemiptera" & dat$ID_order != "Diptera" & dat$ID_order != "Araneae", bio.all(dat$mm_10), dat$bio_10)

# Biomass other: for order = Unknown
dat$bio_1= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(dat$ID_order == "Unknown", bio.all(dat$mm_10), dat$bio_10)

# Biomass other: for order = NA
dat$bio_1= ifelse(is.na(dat$ID_order), bio.all(dat$mm_1), dat$bio_1)
dat$bio_2= ifelse(is.na(dat$ID_order), bio.all(dat$mm_2), dat$bio_2)
dat$bio_3= ifelse(is.na(dat$ID_order), bio.all(dat$mm_3), dat$bio_3)
dat$bio_4= ifelse(is.na(dat$ID_order), bio.all(dat$mm_4), dat$bio_4)
dat$bio_5= ifelse(is.na(dat$ID_order), bio.all(dat$mm_5), dat$bio_5)
dat$bio_6= ifelse(is.na(dat$ID_order), bio.all(dat$mm_6), dat$bio_6)
dat$bio_7= ifelse(is.na(dat$ID_order), bio.all(dat$mm_7), dat$bio_7)
dat$bio_8= ifelse(is.na(dat$ID_order), bio.all(dat$mm_8), dat$bio_8)
dat$bio_9= ifelse(is.na(dat$ID_order), bio.all(dat$mm_9), dat$bio_9)
dat$bio_10= ifelse(is.na(dat$ID_order), bio.all(dat$mm_10), dat$bio_10)

# Make columns numeric
dat = dat %>%
  mutate_at(c("bio_1",
              "bio_2",
              "bio_3",
              "bio_4",
              "bio_5",
              "bio_6",
              "bio_7",
              "bio_8",
              "bio_9",
              "bio_10"), 
            as.numeric)

# Multiply each biomass by count into new column mbio_#
dat = dat %>% 
  mutate(mbio_1 = bio_1*ct_1,
         mbio_2 = bio_2*ct_2,
         mbio_3 = bio_3*ct_3,
         mbio_4 = bio_4*ct_4,
         mbio_5 = bio_5*ct_5,
         mbio_6 = bio_6*ct_6,
         mbio_7 = bio_7*ct_7,
         mbio_8 = bio_8*ct_8,
         mbio_9 = bio_9*ct_9,
         mbio_10 = bio_10*ct_10)

# New column mbio_sum: sum of mbio_1-10
dat = dat %>% 
  mutate(mbio_sum = rowSums(dat[,c("mbio_1", "mbio_2", "mbio_3", "mbio_4", "mbio_5", "mbio_6", "mbio_7", "mbio_8", "mbio_9", "mbio_10")], 
                            na.rm = TRUE)) # exclude NAs

# New column count_sum: sum of ct_1-10
dat = dat %>% 
  mutate(count_sum = rowSums(dat[,c("ct_1", "ct_2", "ct_3", "ct_4", "ct_5", "ct_6", "ct_7", "ct_8", "ct_9", "ct_10")], 
                             na.rm = TRUE)) # exclude NAs
# Ideally, column "count_sum" should = column "count"

######################## Count number of vials in dataset #################################

dat %>% 
  distinct(site,timing,transect,distance,ID) %>% 
  tally() # 7640 vials

######################### Count vials with no calcs #########################################

# Count vials with no mbio_sum measurement
dat %>% 
  filter(mbio_sum == 0) %>% 
  tally() # 3 vials

# Count vials with no count_sum measurement
dat %>% 
  filter(count_sum == 0) %>% 
  tally () # 3 vials

##################### Summarize data by vials #############################################

# Each vial own row
vialsum = dat %>% 
  group_by(site, timing, transect, distance, ID, ID_family, ID_order, ID_notes) %>% 
  summarize(totalbio = sum(mbio_sum, na.rm = TRUE),
            totalct = sum(count_sum, na.rm = TRUE))

# Add column of site type (control/treatment)
vialsum$type = ifelse(vialsum$site == "HL", "Treatment", "Control")
vialsum$type = ifelse(vialsum$site == "LM", "Treatment", vialsum$type)
vialsum$type = ifelse(vialsum$site == "MS", "Treatment", vialsum$type)
vialsum$type = ifelse(vialsum$site == "SM", "Treatment", vialsum$type)
vialsum$type = ifelse(vialsum$site == "WW", "Treatment", vialsum$type)


############ Add Larvae/Adult to Lepidoptera ###############

# New column larva: "Y" if ID_notes contain words larva/larvae
vialsum = vialsum %>% 
  ungroup() %>% 
  mutate(larva = ifelse(grepl("larva|larvae", vialsum$ID_notes, ignore.case = TRUE), "Y", "N"))

# New column ID_order.new with new "Lepidoptera Larva" value
vialsum = vialsum %>% 
  mutate(ID_order.new = ifelse(ID_order == "Lepidoptera" & larva == "Y", "Lepidoptera Larva", as.character(ID_order)))

# Have vialsum only include new ID_order column
vialsum = vialsum %>% 
  dplyr::select(-ID_order, -ID_notes)

# Rename ID_order.new to ID_order
names(vialsum)[names(vialsum) == "ID_order.new"] <- "ID_order"

# Rename "Lepidoptera" to "Lepidoptera Adult"
vialsum$ID_order[vialsum$ID_order == 'Lepidoptera'] <- 'Lepidoptera Adult'


################################## Additional cleaning ###########################

# Exclude 1 observation with timing = "."
vialsum = vialsum %>% 
  filter(timing != ".")


# Exclude 1 observation from HL P T 25 - did not collect on transect T pre-spraying
vialsum = vialsum %>% 
  filter(!(timing == "P" & transect == "T"))


############################## Group ############################################

# Group data by site/timing/transect/distance/order/family/type - some arthropods with same family & order are split into separate vials
ins = vialsum %>% 
  group_by(site, timing, transect, distance, ID_order, ID_family, type) %>% 
  summarize(bio = sum(totalbio, na.rm = TRUE),
            ct = sum(totalct, na.rm = TRUE))


############ Correct for HL pre-spraying transects being longer than all other transects ######################

# HL pre-spraying transects were 60 m long while all other transects were 40 m long

# Multiply bio & ct by (2/3) for all HL pre-spraying samples
ins = ins %>% 
  mutate(bio = ifelse(site == "HL" & timing == "P", bio*(2/3), bio),
         ct = ifelse(site == "HL" & timing == "P", ct*(2/3), ct))


#################################################################################
########################### Summaries ###########################################

# Total count
total.ct = sum(ins$ct)

# Total biomass
total.bio = sum(ins$bio)

# Total orders
total.order = ins %>% 
  group_by(ID_order) %>%
  tally()

# Count families for Araneae, Coleoptera, Hemiptera, and Orthoptera 
count.4orders.fam = ins %>% 
  filter((ID_order == 'Araneae' | ID_order == 'Orthoptera' | ID_order == 'Hemiptera' | 
            ID_order == 'Coleoptera')) %>% 
  group_by(ID_family) %>%
  tally()

# Subset data
orders = ins %>% 
  filter((ID_order == 'Araneae' | ID_order == 'Orthoptera' | ID_order == 'Hemiptera' | 
            ID_order == 'Coleoptera'| ID_order == 'Hymenoptera'| ID_order == 'Lepidoptera Adult'|
            ID_order == 'Lepidoptera Larva'| ID_order == 'Diptera'))


other = setdiff(ins, orders)   # elements in a$x NOT in b$y

other.table = data.frame (ID_order = "Other",
                         p.abund = sum(other$ct)/total.ct,
                        p.bio = sum(other$bio)/total.bio)

order.table = orders %>% 
  group_by(ID_order) %>% 
  summarize(p.abund = sum(ct)/total.ct,
            p.bio = sum(bio)/total.bio) %>% 
  arrange(factor(ID_order, levels = c("Coleoptera", 
                                      "Lepidoptera Larva",
                                      "Orthoptera",
                                      "Araneae", 
                                      "Diptera",  
                                      "Hemiptera",
                                      "Hymenoptera",
                                      "Lepidoptera Adult")))

# Combine
all.order = union(order.table, other.table)

# Columns should sum to 1
sum(all.order$p.abund)
sum(all.order$p.bio)

#################################
######################### Summaries by family ##################

### Araneae
# Subset data
aran = ins %>% 
  filter(ID_order == 'Araneae') %>% 
  group_by(ID_family) %>% 
  summarize(p.abund = sum(ct)/total.ct,
            p.bio = sum(bio)/total.bio)

### Coleoptera

col = ins %>% 
  filter(ID_order == 'Coleoptera') %>% 
  group_by(ID_family) %>% 
  summarize(p.abund = sum(ct)/total.ct,
            p.bio = sum(bio)/total.bio)

### Hemiptera

hem = ins %>% 
  filter(ID_order == 'Hemiptera') %>% 
  group_by(ID_family) %>% 
  summarize(p.abund = sum(ct)/total.ct,
            p.bio = sum(bio)/total.bio)

### Orthoptera

ort = ins %>% 
  filter(ID_order == 'Orthoptera') %>% 
  group_by(ID_family) %>% 
  summarize(p.abund = sum(ct)/total.ct,
            p.bio = sum(bio)/total.bio)

# All tables to character
order.table[] = lapply(order.table, as.character)

# Combine tables
tab.complete = bind_rows(all.order[1,], 
                             col,
                             all.order[2:3,],
                             ort,
                             all.order[4,],
                             aran,
                             all.order[5:6,],
                             hem,
                             all.order[7:8,],
                             all.order[9,])

# Reorder columns
tab.complete = tab.complete %>% 
  select(c(1, 4,2,3))

# Create duplicate
tab.format = tab.complete

# As numeric
tab.format$p.abund = as.numeric(tab.format$p.abund)
tab.format$p.bio = as.numeric(tab.format$p.bio)

# Mutiply by 100
tab.format$p.abund = tab.format$p.abund*100
tab.format$p.bio = tab.format$p.bio*100

# Conditional format
tab.new = tab.format

tab.new$p.abund = ifelse(tab.new$p.abund<0.01, formatC(tab.format$p.abund, format = "e", digits = 1), signif(tab.format$p.abund,2))
tab.new$p.bio = ifelse(tab.new$p.bio<0.01, formatC(tab.format$p.bio, format = "e", digits = 1), signif(tab.format$p.bio,2))


############################### Export ###############################################
# Datestamp
datestamp=as.Date(Sys.Date())
datestamp=format(datestamp, format="%Y%m%d")

write.csv(tab.new, file = paste0("output/", datestamp, "_insect_sample_summary_percent_abund_bio.csv"),row.names = FALSE)