This script will pull in plant observation data from PI surveys, Secchi clarity, lake/watershed geodata, species statuses, and collaborator feedback. Then we’ll sync them into our dataset, creating a full macrophyte obs and env dataset for MN Lakes
# Outstanding Work:
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
strttime <- Sys.time()
getwd()
## [1] "E:/My Drive/Documents/UMN/Grad School/Larkin Lab/R_projects/MN_aquatic_plants_synthesis"
# load libraries ------------------------------------------------------------------
library(data.table)
# update_dev_pkg()# remotes::install_github("Rdatatable/data.table")
library(ggplot2)
library(stringr)
library(sf)
library(vegan)
library(gridExtra)
library(dplyr)
library(tidyr)
library(janitor)
# library(lme4)
# library(sjPlot)
# library(mediation)
library(ggpubr)
# library(EnvStats)
# library(lmerTest)
# library(merTools)
# library(rstanarm)
# library(ggsn)
library(ggpmisc)
library(cowplot)
# load in functions -------------------------------------------------------
f_dowle3natozeros = function(DT, x) {
# or by number (slightly faster than by name) :
for (j in x)
set(DT,which(is.na(DT[[j]])),j,"0")
}
# load in data -------------------------------------------------
The plants observations data and collaborator corrections are datasets that we have generated.
Secchi data have been aggregated from public sources by Kelsey Vitense for https://aslopubs.onlinelibrary.wiley.com/doi/full/10.1002/lol2.10323#lol210323-bib-0034
# #plants observation dataset:
# plants <- fread(input = "data&scripts/data/input/plant_surveys_mn.csv", drop = 1:2) #import, dropping the exported row numbers
# #collaborator corrections and feedback:
# coll_edits <- fread(input = "data&scripts/data/input/Edited_post_contrib_feedback.csv")
#secchi data:
secchi <- fread(input = "data&scripts/data/input/AllSecchi_plus_ShallowLakesSecchi.csv", drop = 1) #import, dropping the exported row numbers
#' The MN DNR does not allow publication of copies of their datasets, and thus the following datasets must be downloaded by a user in order to run this code.
#'
#' Hydrography (https://gisdata.mn.gov/dataset/water-dnr-hydrography; 5April2022) and
#' watershed (https://gisdata.mn.gov/dataset/geos-dnr-watersheds; 10Aug2022) data were
#' retrieved from the MN Geospatial commons.
#'
#' Species statuses were retrieved from the MN DNR website
#' (https://www.dnr.state.mn.us/eco/mcbs/plant_lists.html; 5April2022).
#'
#'Citations:
#'DNR Hydrography Dataset. (2012). Retrieved 5April2022, from https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_dnr/water_dnr_hydrography/metadata/metadata.html
#'DNR Watersheds—DNR Level 04—HUC 08—Majors. (2023). Retrieved 10Aug2022, from https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_dnr/geos_dnr_watersheds/metadata/dnr_watersheds_dnr_level_04_huc_08_majors.html
#'MNTaxa: The State of Minnesota Vascular Plant Checklist. (2013). Retrieved 5April2022, from https://www.dnr.state.mn.us/eco/mcbs/plant_lists.html
#'
## MN DNR Datasets
#hydrography & watersheds
pwi_l <- st_read(dsn = "data&scripts/data/input/shp_water_dnr_hydrography", layer = "dnr_hydro_features_all")
## Reading layer `dnr_hydro_features_all' from data source
## `E:\My Drive\Documents\UMN\Grad School\Larkin Lab\R_projects\MN_aquatic_plants_synthesis\data&scripts\data\input\shp_water_dnr_hydrography'
## using driver `ESRI Shapefile'
## Simple feature collection with 130913 features and 43 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 189729.8 ymin: 4793853 xmax: 1165764 ymax: 5514207
## Projected CRS: NAD83 / UTM zone 15N
watersheds_huc8 <- st_read(dsn = "data&scripts/data/input/shp_geos_dnr_watersheds", layer = "dnr_watersheds_dnr_level_04_huc_08_majors")
## Reading layer `dnr_watersheds_dnr_level_04_huc_08_majors' from data source
## `E:\My Drive\Documents\UMN\Grad School\Larkin Lab\R_projects\MN_aquatic_plants_synthesis\data&scripts\data\input\shp_geos_dnr_watersheds'
## using driver `ESRI Shapefile'
## Simple feature collection with 81 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 189775.3 ymin: 4816305 xmax: 761638.2 ymax: 5472428
## Projected CRS: NAD83 / UTM zone 15N
#species statuses
rte <- fread(input = "data&scripts/data/input/2013_dnr_plant_checklist_web.csv")
# Strip Rare Species ------------------------------------------------------
Due to legal protections on rare, threatened, and endangered species, the following section strips the identities of rare species in the dataset. The identities can be recovered by requesting either the full data imported above (currently commented out), or the key to the protected species names below (note that only the former will get you any location info that was associated with those data).
# de-identify rare species:
rte <- clean_names(rte)#tidy names on status dataset
# #Strip species ID from protected species
# namesstripkey <- plants[ TAXON %in% rte[rarity_status != "" , mn_dnr_scientific_name,], .N , TAXON]
# others <- plants[ !(TAXON %in% rte[, mn_dnr_scientific_name,]), .N , TAXON]
# rm(others) #none of these trigger me to need to deident them
#
# namesstripkey[ , new_name := paste("ProtectedSpecies", .I, sep = "_") , ,]
#
# plants[ namesstripkey , on = .(TAXON=TAXON) , new_name := new_name]
# plants[ TAXON %in% namesstripkey[,TAXON], new_name ]
#
# plants[ , .N , new_name]
#
# plants[ !is.na(new_name), TAXON := new_name ]
#
# plants[ , new_name := NULL]
# colnames(plants)
#
# #strip locs from all points with a protected species
#
# ps_points <- plants[str_detect(TAXON, "ProtectedSpecies" ), unique(POINT_ID) , ]#give me the point ids for all points with a protected species
#
# #any loc data there?
# plants[POINT_ID %in% ps_points, c("X","Y","NORTHING","EASTING","LATITUDE","LONGITUDE","UTMX","UTMY")]
#
# #delete that data
# plants[POINT_ID %in% ps_points, c("X","Y","NORTHING","EASTING","LATITUDE","LONGITUDE","UTMX","UTMY"):= NA, ]
#
# #export rte key:
# fwrite(namesstripkey, file = "data&scripts/data/output/rte_namestrip_key.csv" )
# #export rte stripped product:
# fwrite(plants, file = "data&scripts/data/output/plants_input_data_rtestrip.csv")
#import rte stripped product:
# plants <- fread(input = "data&scripts/data/output/plants_input_data_rtestrip.csv") #import, dropping the exported row numbers
# stripped out peoples personally identifiable information ------------------------------------------------
# #collaborator corrections and feedback:
coll_edits <- fread(input = "data&scripts/data/input/Edited_post_contrib_feedback_noPII.csv")
#import PLANTS INPUT product:
plants <- fread(input = "data&scripts/data/output/plants_input_data_rtestrip_noPII.csv") #import, dropping the exported row numbers
plants[TAXON == "", .N]
## [1] 689009
plants[TAXON == "", TAXON:= NA]
# *****DatasetUpdates***** ------------------------------------------------
# collaborator corrections ------------------------------------------------
This section uses the collaborator feedback to revise the dataset.
# check survey ID alignment
#sum(!coll_edits[, SURVEY_ID, ] %in% plants[ , SURVEY_ID]) #100% of collaborator input has a match in plants
names(coll_edits)[1] <- "feedback"
coll_edits[ , .N , feedback ]
## feedback N
## <char> <int>
## 1: curlyleaf pondweed survey delete 23
## 2: data available 9
## 3: delete erroneous entry 1
## 4: duplicate delete 6
## 5: errors from import 2
## 6: missing metadata 2
## 7: no data available 92
## 8: Point 69 has Lmin entered as 5 - change to 2 1
## 9: rake density data available 1
## 10: reimport required 2
## 11: unuseable delete 683
## 12: useable data reimport 149
## 13: 1853
# deletions
# coll_edits[ str_detect(feedback, "delete",), SURVEY_ID , ]#which are marked for deletion?
plants <- plants[ !SURVEY_ID %in%
coll_edits[ str_detect(feedback, "delete",), SURVEY_ID , ]#this drops about 10k observations from the dataset
, , ]
# tag for reimport
# these surveys need to be reimported and any current data deleted. We have these data in our files.
# coll_edits[str_detect(feedback, "import",) , SURVEY_ID , ]
# plants[ SURVEY_ID %in% coll_edits[str_detect(feedback, "import",) , SURVEY_ID , ], .N , SURVEY_ID]
sel <- plants[ SURVEY_ID %in% coll_edits[str_detect(feedback, "import",) , SURVEY_ID , ], , ] #peel off those reimport data
sel <- sel[!duplicated(sel[ , SURVEY_ID , ]),] #compress to one row where botched import produced some data
# plants[ SURVEY_ID %in% coll_edits[str_detect(feedback, "import",) , SURVEY_ID , ], .N , SURVEY_ID]
# drop or modify some cols to reflect bad import
sel[ , c("STA_NBR_DATASOURCE", "DEPTH_FT", "NO_VEG_FOUND", "REL_ABUND", "WHOLE_RAKE_REL_ABUND","SUBSTRATE", "SURVEYOR", "TAXON", "SAMPLE_NOTES", "SURFACE_GROWTH", "POINT_LVL_SECCHI", "X", "Y", "NORTHING", "EASTING", "LATITUDE", "LONGITUDE", "UTMX", "UTMY", "POINT_ID", "OBS_ID") := NA ,]#in sel, dump these columns
sel[ , INDATABASE := FALSE]#mark these as not in database
plants <- plants[ !SURVEY_ID %in% coll_edits[str_detect(feedback, "import",) , SURVEY_ID , ], , ] #drops ~900 obs
plants <- rbind(plants, sel)
plants[ SURVEY_ID %in% sel[, SURVEY_ID] , SURVEY_FEEDBACK := "reimport required" , ]
# data available from collaborator
# coll_edits[ feedback %in% c("data available", "missing metadata") , SURVEY_ID , ]
plants[ SURVEY_ID %in% coll_edits[ feedback %in% c("data available", "missing metadata") , SURVEY_ID , ] , SURVEY_FEEDBACK := "data available from collaborator" , ]
plants[ SURVEY_ID %in% coll_edits[ feedback %in% c("rake density data available") , SURVEY_ID , ] , SURVEY_FEEDBACK := "rake density data available from collaborator" , ]
# no data available
plants[ SURVEY_ID %in% coll_edits[ feedback %in% c("no data available") , SURVEY_ID , ] , SURVEY_FEEDBACK := "no data available" , ]
nrow(plants[SURVEY_FEEDBACK == "no data available" , .N , SURVEY_ID] )# how many cases with data unavailable/ not known where raw data are?
## [1] 92
# one-offs
# coll_edits[feedback == "Point 69 has Lmin entered as 5 - change to 2", SURVEY_ID]
plants[SURVEY_ID == coll_edits[feedback == "Point 69 has Lmin entered as 5 - change to 2", SURVEY_ID] &
STA_NBR_DATASOURCE == 69 &
REL_ABUND == 5,
REL_ABUND := 2]
# Taxa naming problem:
plants[TAXON == "Mitellopsis", TAXON := "Nitellopsis"]
# preferred datasource name
# coll_edits[ , .N , EDIT_DATASOURCE]
plants[ , SURVEY_DATASOURCE := coll_edits[match(plants$DATASOURCE, coll_edits$DATASOURCE), EDIT_DATASOURCE ] , ]
# plants[ , .N , SURVEY_DATASOURCE]
# plants[SURVEY_DATASOURCE == "", .N , DATASOURCE ]
plants[DATASOURCE == "DNR Lakes and Rivers", SURVEY_DATASOURCE := "DNR Lakes and Rivers"]
plants[DATASOURCE == "DNR Fisheries", SURVEY_DATASOURCE := "DNR Fisheries"]
plants[DATASOURCE == "Rantala TIP", SURVEY_DATASOURCE := "DNR Fisheries"]
plants[DATASOURCE == "Muthukrishnan Et al", SURVEY_DATASOURCE := "DNR Shallow Lakes" , ]
plants[SURVEY_DATASOURCE == "DNR Fisheries Research" , SURVEY_DATASOURCE := "DNR Fisheries"]
# check contribution # of surveys by new named datasources
plants[ , length(unique(SURVEY_ID)) , SURVEY_DATASOURCE ]
## SURVEY_DATASOURCE V1
## <char> <int>
## 1: DNR Shallow Lakes 1666
## 2: Freshwater Scientific Services 200
## 3: Newman Lab University of Minnesota 115
## 4: Minnehaha Creek Watershed District 111
## 5: 495
## 6: DNR Invasive Species Program 352
## 7: DNR Fisheries 37
## 8: Blue Water Science 110
## 9: Minneapolis Park & Recreation Board 10
## 10: Endangered Resource Services, LLC 7
## 11: Barr Engineering 85
## 12: Three Rivers Park District 63
## 13: AIS Consulting Services 13
## 14: Ramsey County 91
## 15: Ramsey-Washington Metro Watershed District 30
## 16: Capitol Region Watershed District 60
## 17: Emmons & Olivier Resources, Inc. 8
# lake name corrections
plants[ , NEW_LAKE_NAME := coll_edits[match(plants$SURVEY_ID, coll_edits$SURVEY_ID), EDIT_LAKE_NAME ] , ]
plants[NEW_LAKE_NAME %in% c("lake of the isles", "clear", "bde maka ska"), LAKE_NAME := NEW_LAKE_NAME ]
plants[ , NEW_LAKE_NAME := NULL ,]
# surveyor corrections
plants[ , NEW_SURVEYOR := coll_edits[match(plants$SURVEY_ID, coll_edits$SURVEY_ID), EDIT_SURVEYOR ] , ]
# plants[, .N, NEW_SURVEYOR ]
plants[!NEW_SURVEYOR == "" & !is.na(NEW_SURVEYOR), SURVEYOR := NEW_SURVEYOR , ]
plants[ , NEW_SURVEYOR := NULL ,]
# a <- plants[ , length(unique(SURVEY_ID)) , SURVEYOR]
# date corrections
plants[ , NEW_DATE := coll_edits[match(plants$SURVEY_ID, coll_edits$SURVEY_ID), EDIT_DATE ] , ]
# plants[, .N, NEW_DATE ]
# plants[!NEW_DATE == "" & !is.na(NEW_DATE), .N , NEW_DATE ]
plants[!NEW_DATE == "" & !is.na(NEW_DATE) , SURVEY_DATE := as.Date(NEW_DATE, format = "%d%b%Y") ,]
plants[ , NEW_DATE := NULL ,]
# input rake density scales
#overwrite any bad rake scales:
coll_edits[!is.na(`EDITED_SCALE_RAKE_DENS (0-X)`) , `SCALE_RAKE_DENS (0-X)` := `EDITED_SCALE_RAKE_DENS (0-X)` ]
# coll_edits[ ,.N , `SCALE_RAKE_DENS (0-X)` ]
coll_edits[ `SCALE_RAKE_DENS (0-X)` %in% c(1,2) , `SCALE_RAKE_DENS (0-X)` := NA ]# these aren't real abundance scales--they should be marked as NA, to indicate only pres-abs data are useable.
#push over to plants DB:
plants[ , RAKE_SCALE_USED := coll_edits[match(plants$SURVEY_ID, coll_edits$SURVEY_ID), `SCALE_RAKE_DENS (0-X)` ] , ]
# for these surveys, we can see that our collaborators inputs on rake scale was not correct:
plants[REL_ABUND>RAKE_SCALE_USED, .N , .(SURVEY_ID) ]
## SURVEY_ID N
## <int> <int>
## 1: 1418 48
## 2: 3128 1
#two more left now to manually change to max scale observed rather than reported:
plants[SURVEY_ID == 1418 , RAKE_SCALE_USED := 5]
plants[SURVEY_ID == 3128 , RAKE_SCALE_USED := 5]
# check process:
plants[ , .("max_observed_in_data" = max(REL_ABUND, na.rm = T)) , RAKE_SCALE_USED]
## RAKE_SCALE_USED max_observed_in_data
## <int> <int>
## 1: NA 2
## 2: 4 4
## 3: 5 5
## 4: 3 3
#there are 29 unique surveys where no rake scale data are provided, and no easy inference exists -- swap these to presence absence
plants[SURVEY_ID %in% plants[is.na(RAKE_SCALE_USED) & REL_ABUND >1, SURVEY_ID], .N , RAKE_SCALE_USED]
## RAKE_SCALE_USED N
## <int> <int>
## 1: NA 3443
plants[is.na(RAKE_SCALE_USED), .N , REL_ABUND ]
## REL_ABUND N
## <int> <int>
## 1: NA 984135
## 2: 1 17188
## 3: 2 135
#drop the abundance data from these
plants[is.na(RAKE_SCALE_USED), REL_ABUND := NA ]
# clean up WS
rm(coll_edits, sel)
# summarize plants dataset ---------------------------------------------------
Review current data status and outline changes needed for
str(plants) #what data formats?
## Classes 'data.table' and 'data.frame': 1278192 obs. of 48 variables:
## $ SURVEY_ID : int 1 1 1 1 1 1 1 1 1 1 ...
## $ LAKE_NAME : chr "little prairie" "little prairie" "little prairie" "little prairie" ...
## $ DATASOURCE : chr "Muthukrishnan Et al" "Muthukrishnan Et al" "Muthukrishnan Et al" "Muthukrishnan Et al" ...
## $ SURVEY_DATE : IDate, format: "2011-08-16" "2011-08-16" ...
## $ STA_NBR_DATASOURCE : chr "1" "1" "10" "11" ...
## $ DEPTH_FT : num 3.8 3.8 2.5 5.8 6 6.8 7 6.8 7 6.2 ...
## $ NO_VEG_FOUND : logi FALSE FALSE FALSE TRUE TRUE FALSE ...
## $ REL_ABUND : int NA NA NA NA NA NA NA NA NA NA ...
## $ WHOLE_RAKE_REL_ABUND: chr "" "" "" "" ...
## $ SUBSTRATE : chr "" "" "" "" ...
## $ SURVEYOR : chr "surveyors_1" "surveyors_1" "surveyors_1" "surveyors_1" ...
## $ TAXON : chr "Ceratophyllum demersum" "Vallisneria americana" "Drepanocladus" NA ...
## $ SURVEY_ID_DATASOURCE: chr "4664" "4664" "4664" "4664" ...
## $ SAMPLE_NOTES : chr "" "" "" "" ...
## $ SURFACE_GROWTH : chr "" "" "" "" ...
## $ POINT_LVL_SECCHI : num 2 2 2.25 2.25 2.25 2.25 2 2 2 2 ...
## $ X : num NA NA NA NA NA NA NA NA NA NA ...
## $ Y : num NA NA NA NA NA NA NA NA NA NA ...
## $ NORTHING : int NA NA NA NA NA NA NA NA NA NA ...
## $ EASTING : num NA NA NA NA NA NA NA NA NA NA ...
## $ LATITUDE : num NA NA NA NA NA NA NA NA NA NA ...
## $ LONGITUDE : num NA NA NA NA NA NA NA NA NA NA ...
## $ UTMX : num NA NA NA NA NA NA NA NA NA NA ...
## $ UTMY : int NA NA NA NA NA NA NA NA NA NA ...
## $ POINT_ID : int 1 1 2 3 4 5 6 7 8 9 ...
## $ OBS_ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ OLD_SURVEY_ID : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DATESURVEYSTART : chr "8/16/2011" "8/16/2011" "8/16/2011" "8/16/2011" ...
## $ DOW : int 1001600 1001600 1001600 1001600 1001600 1001600 1001600 1001600 1001600 1001600 ...
## $ COHORT : int NA NA NA NA NA NA NA NA NA NA ...
## $ DATEINFO : chr "" "" "" "" ...
## $ MONTH : int 8 8 8 8 8 8 8 8 8 8 ...
## $ DAY : int 16 16 16 16 16 16 16 16 16 16 ...
## $ YEAR : int 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
## $ SUBBASIN : chr "" "" "" "" ...
## $ INVENTORY_STAFF : chr "" "" "" "" ...
## $ INVENTORY_STAFFDATE : chr "" "" "" "" ...
## $ USEABLE : chr "" "" "" "" ...
## $ CLEANED : chr "" "" "" "" ...
## $ INDATABASE : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ INVENTORY_NOTES : chr "" "" "" "" ...
## $ SUBMISSION_STAFF : chr "staff_1" "staff_1" "staff_1" "staff_1" ...
## $ SUBMISSION_STAFFDATE: chr "" "" "" "" ...
## $ SUBMISSION_NOTES : chr "" "" "" "" ...
## $ MULTIPARTSURVEY : num NA NA NA NA NA NA NA NA NA NA ...
## $ SURVEY_FEEDBACK : chr NA NA NA NA ...
## $ SURVEY_DATASOURCE : chr "DNR Shallow Lakes" "DNR Shallow Lakes" "DNR Shallow Lakes" "DNR Shallow Lakes" ...
## $ RAKE_SCALE_USED : int NA NA NA NA NA NA NA NA NA NA ...
## - attr(*, ".internal.selfref")=<externalptr>
## - attr(*, "index")= int(0)
## ..- attr(*, "__SURVEY_ID")= int [1:1278192] 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "__SURVEY_FEEDBACK")= int [1:1278192] 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "__DATASOURCE")= int [1:1278192] 1 2 3 4 5 6 7 8 9 10 ...
names(plants) #field names
## [1] "SURVEY_ID" "LAKE_NAME" "DATASOURCE"
## [4] "SURVEY_DATE" "STA_NBR_DATASOURCE" "DEPTH_FT"
## [7] "NO_VEG_FOUND" "REL_ABUND" "WHOLE_RAKE_REL_ABUND"
## [10] "SUBSTRATE" "SURVEYOR" "TAXON"
## [13] "SURVEY_ID_DATASOURCE" "SAMPLE_NOTES" "SURFACE_GROWTH"
## [16] "POINT_LVL_SECCHI" "X" "Y"
## [19] "NORTHING" "EASTING" "LATITUDE"
## [22] "LONGITUDE" "UTMX" "UTMY"
## [25] "POINT_ID" "OBS_ID" "OLD_SURVEY_ID"
## [28] "DATESURVEYSTART" "DOW" "COHORT"
## [31] "DATEINFO" "MONTH" "DAY"
## [34] "YEAR" "SUBBASIN" "INVENTORY_STAFF"
## [37] "INVENTORY_STAFFDATE" "USEABLE" "CLEANED"
## [40] "INDATABASE" "INVENTORY_NOTES" "SUBMISSION_STAFF"
## [43] "SUBMISSION_STAFFDATE" "SUBMISSION_NOTES" "MULTIPARTSURVEY"
## [46] "SURVEY_FEEDBACK" "SURVEY_DATASOURCE" "RAKE_SCALE_USED"
plants[ , length(unique(SURVEY_ID)) , ] #how many surveys in all?
## [1] 3453
plants[ INDATABASE == T , length(unique(SURVEY_ID))] #how many surveys do we have the data in our db for?
## [1] 3196
plants[ , length((unique(DOW))) , ] #how many lake in all?
## [1] 1553
plants[ , length(unique(YEAR)) , ] #how many years of data?
## [1] 22
plants[ , length(unique(POINT_ID)),] #how samples pulled from the lake?
## [1] 372827
plants[!is.na(TAXON) , length(unique(OBS_ID))] # how many times was a plant identified in these data?
## [1] 594127
#' Lets see how many surveys (then number of points) we have been given by each contributor:
plants[ , unique(SURVEY_DATASOURCE) ,]
## [1] "DNR Shallow Lakes"
## [2] "Freshwater Scientific Services"
## [3] "Newman Lab University of Minnesota"
## [4] "Minnehaha Creek Watershed District"
## [5] ""
## [6] "DNR Invasive Species Program"
## [7] "DNR Fisheries"
## [8] "Blue Water Science"
## [9] "Minneapolis Park & Recreation Board"
## [10] "Endangered Resource Services, LLC"
## [11] "Barr Engineering"
## [12] "Three Rivers Park District"
## [13] "AIS Consulting Services"
## [14] "Ramsey County"
## [15] "Ramsey-Washington Metro Watershed District"
## [16] "Capitol Region Watershed District"
## [17] "Emmons & Olivier Resources, Inc."
# survey contribution viz
ggplot(plants[ , .N, .(SURVEY_ID, SURVEY_DATASOURCE, INDATABASE)], aes(SURVEY_DATASOURCE, fill = INDATABASE))+
geom_bar(stat = "count", position = "stack" )+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ggtitle(label = "n surveys by contributor")+
scale_y_log10()
# point contributions
ggplot(plants[INDATABASE==T , .N, .(POINT_ID, SURVEY_DATASOURCE, INDATABASE)], aes(SURVEY_DATASOURCE))+
geom_bar(stat = "count", position = "stack" )+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ggtitle(label = "n points by contributor")+
scale_y_log10()
The database has all the surveys we know exist for MN in it, including those for which we do not have the data. It is often useful to snip those no-data ones off right away to avoid running any calcs using all those rows w/o any species data.
missing_data_surveys <- plants[ INDATABASE == F]
plants <- plants[INDATABASE == T]
# drop zeros --------------------------------------------------------------
We dropped surveys with no depth data in an early cleaning step. This happened before we merged datasets from the MN DNR into the database, meaning that we’ve still got to do a purge of 0 and NA depths to be sure we’ve handled DNR and other collaborator data consistently:
#any remaining points with depth == NA or 0? They need to be dropped to be consistent in the handling of all no depth sampled points (currently only MNDNR ):
plants[is.na(DEPTH_FT)|DEPTH_FT == 0 , ][ , .N , .(SURVEY_ID, DATASOURCE)]
## SURVEY_ID DATASOURCE N
## <int> <char> <int>
## 1: 16 Muthukrishnan Et al 8
## 2: 128 Muthukrishnan Et al 2
## 3: 134 Muthukrishnan Et al 1
## 4: 197 Muthukrishnan Et al 1
## 5: 227 Muthukrishnan Et al 11
## ---
## 191: 3305 Muthukrishnan Et al 2
## 192: 3307 Muthukrishnan Et al 5
## 193: 827 source_29 23
## 194: 832 source_29 13
## 195: 4335 source_29 15
plants[is.na(DEPTH_FT)|DEPTH_FT == 0 , .N, .(SURVEY_ID, DATASOURCE)][, unique(DATASOURCE)] #these only still remain in the DNR data--thats because we did the DNR data merge after cleaning up the other data
## [1] "Muthukrishnan Et al" "source_2" "source_29"
## [4] "source_28" "source_38" "source_27"
## [7] "source_34"
sum(plants[ SURVEY_ID %in% plants[is.na(DEPTH_FT)|DEPTH_FT == 0 , .N, .(SURVEY_ID, DATASOURCE)][,SURVEY_ID], .N , .(SURVEY_ID, DATASOURCE)
][ , N]) #counts all points in those surveys
## [1] 113259
#drop them:
plants <- plants[!is.na(DEPTH_FT)|DEPTH_FT == 0 ]
# duplicated entries ------------------------------------------------------
It’s become aparrent to me that when we casted the data to long format in the survey collation project, we ended up with many cases of multiple “observations” of the same thing from within a single point. Here we clean up this issue. I found the cause of it by opening up the surveycollation project
#drop duplicated entries:
names(plants)
## [1] "SURVEY_ID" "LAKE_NAME" "DATASOURCE"
## [4] "SURVEY_DATE" "STA_NBR_DATASOURCE" "DEPTH_FT"
## [7] "NO_VEG_FOUND" "REL_ABUND" "WHOLE_RAKE_REL_ABUND"
## [10] "SUBSTRATE" "SURVEYOR" "TAXON"
## [13] "SURVEY_ID_DATASOURCE" "SAMPLE_NOTES" "SURFACE_GROWTH"
## [16] "POINT_LVL_SECCHI" "X" "Y"
## [19] "NORTHING" "EASTING" "LATITUDE"
## [22] "LONGITUDE" "UTMX" "UTMY"
## [25] "POINT_ID" "OBS_ID" "OLD_SURVEY_ID"
## [28] "DATESURVEYSTART" "DOW" "COHORT"
## [31] "DATEINFO" "MONTH" "DAY"
## [34] "YEAR" "SUBBASIN" "INVENTORY_STAFF"
## [37] "INVENTORY_STAFFDATE" "USEABLE" "CLEANED"
## [40] "INDATABASE" "INVENTORY_NOTES" "SUBMISSION_STAFF"
## [43] "SUBMISSION_STAFFDATE" "SUBMISSION_NOTES" "MULTIPARTSURVEY"
## [46] "SURVEY_FEEDBACK" "SURVEY_DATASOURCE" "RAKE_SCALE_USED"
plants[ , .N , .(SURVEY_ID,
POINT_ID ,
NO_VEG_FOUND ,
# proplight ,
DEPTH_FT ,
SUBSTRATE ,
SURVEYOR, TAXON) ][N>1 , hist(N) , ]
## $breaks
## [1] 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
##
## $counts
## [1] 2397 15 5 0 0 0 0 0 0 0 0 0
## [13] 0 36151
##
## $density
## [1] 0.0621499689 0.0003889235 0.0001296412 0.0000000000 0.0000000000
## [6] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [11] 0.0000000000 0.0000000000 0.0000000000 0.9373314665
##
## $mids
## [1] 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 10.5 11.5 12.5 13.5 14.5 15.5
##
## $xname
## [1] "N"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
sum(duplicated(plants$OBS_ID)) #obs ID is unique, but it was generated on row value, not on a unique key set
## [1] 0
names(plants[ , .SD , .SDcols = !c("OBS_ID") ])
## [1] "SURVEY_ID" "LAKE_NAME" "DATASOURCE"
## [4] "SURVEY_DATE" "STA_NBR_DATASOURCE" "DEPTH_FT"
## [7] "NO_VEG_FOUND" "REL_ABUND" "WHOLE_RAKE_REL_ABUND"
## [10] "SUBSTRATE" "SURVEYOR" "TAXON"
## [13] "SURVEY_ID_DATASOURCE" "SAMPLE_NOTES" "SURFACE_GROWTH"
## [16] "POINT_LVL_SECCHI" "X" "Y"
## [19] "NORTHING" "EASTING" "LATITUDE"
## [22] "LONGITUDE" "UTMX" "UTMY"
## [25] "POINT_ID" "OLD_SURVEY_ID" "DATESURVEYSTART"
## [28] "DOW" "COHORT" "DATEINFO"
## [31] "MONTH" "DAY" "YEAR"
## [34] "SUBBASIN" "INVENTORY_STAFF" "INVENTORY_STAFFDATE"
## [37] "USEABLE" "CLEANED" "INDATABASE"
## [40] "INVENTORY_NOTES" "SUBMISSION_STAFF" "SUBMISSION_STAFFDATE"
## [43] "SUBMISSION_NOTES" "MULTIPARTSURVEY" "SURVEY_FEEDBACK"
## [46] "SURVEY_DATASOURCE" "RAKE_SCALE_USED"
sum(duplicated(plants[ , .SD , .SDcols = !c("OBS_ID") ])) #here we see the unique key set (everything BUT obs ID) tells us to drop 500k observations!
## [1] 543494
plants <- plants[!duplicated(plants[ , .SD , .SDcols = !c("OBS_ID") ]) , , ]
# still a bunch of dups leftover where we've got:
# two abunds for one species or two of something for one species...
# with a little sleuthing, I can see that these are a whole mix of things. For
# example, James Johnson submitted one survey with two samples for point 213...
# the solution I'll use is to allow these obs to stay (assuming that both obs
# are real, and the data entry resulted in a bad point ID for one of them).
# because of this, when we agg to the point level, we'll have to choose an obs
# to use that taxon. You'll see this play out in the species matrix
# construction below:
plants[ , .N , .(SURVEY_ID,
POINT_ID ,
NO_VEG_FOUND ,
#proplight ,
DEPTH_FT ,
SUBSTRATE ,
SURVEYOR, TAXON) ][N>1 , .N , ]
## [1] 1292
plants[ , .N , .(SURVEY_ID,
POINT_ID ,
NO_VEG_FOUND ,
#proplight ,
DEPTH_FT ,
SUBSTRATE ,
SURVEYOR, TAXON) ][N>1 , unique(POINT_ID) , ]
## [1] 96591 96844 138942 138943 138944 138945 138948 138949 138950 138951
## [11] 138952 138953 138954 138955 138956 138957 138958 138959 138960 138961
## [21] 138962 138963 138964 138966 138967 138968 138969 138970 138971 138973
## [31] 138974 138975 138976 138977 138979 138980 138981 138982 138983 138984
## [41] 138987 138988 138989 138990 138991 138992 138993 138994 138995 138996
## [51] 138997 138998 138999 139000 139001 139002 139003 139004 139005 139006
## [61] 139007 139008 139009 139010 139012 139013 139014 139015 139016 139017
## [71] 139018 139019 139020 139021 139022 139023 139024 139025 139026 139027
## [81] 139028 139029 139030 139031 139032 139033 139034 139035 139036 139037
## [91] 139038 139039 139040 139043 139044 139045 139046 139047 139050 139052
## [101] 139053 139054 139055 139056 139059 139063 139065 139066 139067 139068
## [111] 139070 139071 139072 139073 139074 139075 139076 139077 139080 139081
## [121] 139082 139083 139084 139085 139087 139088 139089 139090 139096 139097
## [131] 139098 139099 139101 139102 139103 139104 139105 139108 139109 139110
## [141] 139111 139112 139113 139114 139115 139116 139117 139118 139119 139121
## [151] 139122 139123 139124 139125 139126 139129 139130 139131 139132 139133
## [161] 139134 139140 139142 139145 139146 139147 139148 139151 139152 139153
## [171] 139154 139156 139158 139159 139161 139162 139164 139165 139166 139167
## [181] 139168 139169 139170 139171 139172 139173 139175 139176 139177 139180
## [191] 139181 139182 139185 139186 139188 139189 139190 139191 139192 139193
## [201] 139194 139197 139198 139199 139200 139201 139203 139204 139205 139206
## [211] 139208 139209 139210 139211 139212 139213 139214 139215 139216 139217
## [221] 139219 139220 139222 139223 139224 139225 139228 139229 139231 139232
## [231] 139233 139234 139235 139236 139237 139238 139239 139240 139241 139242
## [241] 139243 139244 139246 139247 139250 139251 139252 139254 139256 139258
## [251] 139259 139260 139261 139262 139263 139264 139265 139266 139267 139268
## [261] 139269 139270 139271 139272 139273 139274 139275 139276 139277 139278
## [271] 139279 139280 139281 139282 139283 139284 139285 139287 139289 139290
## [281] 139291 139292 139293 139294 139295 139296 139297 139298 139299 139300
## [291] 139301 139302 139303 139304 139305 139306 139307 139308 139309 139310
## [301] 139311 139312 139313 139315 139316 139317 139318 139319 139320 139321
## [311] 139322 139325 139326 139330 139332 139333 139334 139335 139336 139337
## [321] 139338 139339 139340 139341 139342 139343 139345 139346 139347 139348
## [331] 139350 139352 139353 139354 139357 139358 139359 139360 139361 139364
## [341] 139365 139368 139369 139370 139371 139372 139373 139374 139375 139376
## [351] 139377 139378 139379 139380 139381 139385 139386 139388 139391 139392
## [361] 139395 139396 139397 139398 139402 139403 139405 139406 139407 139408
## [371] 139411 139412 139413 139415 139416 139417 139418 139420 139423 139424
## [381] 139427 139428 139429 139430 139432 139434 139435 139436 139437 139438
## [391] 139441 139442 139443 139444 139447 139448 139449 139451 139455 139456
## [401] 139457 139459 139460 139461 139462 139463 139465 139466 139467 139470
## [411] 139472 139473 139474 139476 139477 139478 139479 139480 139483 139487
## [421] 139489 139490 139494 139495 139496 139497 139499 139501 139502 139503
## [431] 139505 139507 139508 139509 139511 139513 139514 139515 139516 139517
## [441] 139518 139519 139522 139523 139524 139525 139527 139530 139531 139532
## [451] 139533 139535 139536 139537 139540 139542 139543 139544 139555 139557
## [461] 139558 139560 139561 139562 139564 139567 139571 139576 139583 139589
## [471] 139596 139597 139598 139600 139605 139609 139610 139611 139612 139616
## [481] 139618 139619 139620 139621 139622 139623 139624 139625 139626 139627
## [491] 139629 139630 139631 139635 139637 139638 139639 139643 139645 139646
## [501] 139647 139650 139651 139652 139655 139656 139657 139659 139660 139661
## [511] 139662 139663 139665 139667 139668 139678 139682 139683 139692 139701
## [521] 139708 139710 139722 139723 139728 139729 139730 139733 139734 139735
## [531] 139736 139741 139742 139743 139744 139746 139747 139748 139750 139753
## [541] 139754 139755 139756 139759 139761 139763 139765 139767 139768 139769
## [551] 139771 139772 139775 139777 139780 139781 139782 139783 139784 139786
## [561] 139787 139788 139790 139792 139793 139799 139803 139807 139809 139816
## [571] 139817 139818 139819 139822 139826 139831 139833 139834 139835 139836
## [581] 139846 139848 139854 139855 139856 139857 139859 139860 139861 139862
## [591] 139866 139867 139870 139871 139872 139873 139875 139876 139877 139879
## [601] 139880 139881 139883 139884 139885 139887 139889 139891 139893 139894
## [611] 139895 139898 139899 139900 139901 139903 139906 139907 139908 139910
## [621] 139916 139918 139926 139933 139942 139943 139946 139947 139952 139959
## [631] 139965 139973 139981 139986 139992 139994 139995 139996 139997 139998
## [641] 139999 140001 140002 140007 140011 140013 140014 140015 140019 140020
## [651] 140021 140023 140027 140965 140983 253279 255349 255353 359306
Now “plants” is only those surveys for which we were able to gather and collate the data. Below we organize these data 3 ways:
#how many surveys and how many points were sampled in each?
hist(plants[ ,length(unique(POINT_ID)) , SURVEY_ID]$V1, breaks= 100, main = "N points per survey", xlab = "Npoints")
#how many unique TAXA?
unique(plants$TAXON)
## [1] "Ceratophyllum demersum" "Vallisneria americana"
## [3] "Drepanocladus" NA
## [5] "Potamogeton robbinsii" "Carex"
## [7] "Equisetum" "Nuphar variegata"
## [9] "Potentilla palustris" "Sagittaria"
## [11] "Sparganium (floating)" "Nymphaea odorata"
## [13] "Typha angustifolia" "Potamogeton zosteriformis"
## [15] "Zizania palustris" "Schoenoplectus acutus"
## [17] "Potamogeton praelongus" "Brasenia schreberi"
## [19] "Utricularia vulgaris" "Potamogeton natans"
## [21] "Myriophyllum sibiricum" "Pontederia cordata"
## [23] "Schoenoplectus subterminalis" "Potamogeton amplifolius"
## [25] "Eleocharis" "Nitella"
## [27] "Najas flexilis" "Potamogeton illinoensis"
## [29] "Potamogeton gramineus" "Chara"
## [31] "Utricularia intermedia" "Sparganium (emergent)"
## [33] "Nymphaeaceae" "Calla palustris"
## [35] "Sagittaria cuneata" "Bidens beckii"
## [37] "Potamogeton richardsonii" "Elodea canadensis"
## [39] "Potamogeton (narrow)" "Eleocharis acicularis"
## [41] "Potamogeton friesii" "Potamogeton epihydrus"
## [43] "ProtectedSpecies_1" "Utricularia minor"
## [45] "Utricularia" "Iris"
## [47] "Schoenoplectus pungens" "Eleocharis smallii"
## [49] "Salix" "Schoenoplectus"
## [51] "Sparganium" "Stuckenia pectinata"
## [53] "Sparganium fluctuans" "Heteranthera dubia"
## [55] "Elodea" "Najas"
## [57] "Isoetes" "Ranunculus flammula"
## [59] "Potamogeton crispus" "Phragmites australis"
## [61] "Ranunculus" "Dulichium arundinaceum"
## [63] "Bolboschoenus fluviatilis" "Spirodela polyrhiza"
## [65] "Potamogeton alpinus" "Myriophyllum tenellum"
## [67] "Schoenoplectus tabernaemontani" "Eriocaulon aquaticum"
## [69] "Hypericum" "Acorus americanus"
## [71] "Persicaria" "Impatiens"
## [73] "Lemna trisulca" "ProtectedSpecies_2"
## [75] "Potamogeton (broad)" "Lemna minor"
## [77] "Myriophyllum" "Hippuris vulgaris"
## [79] "Typha latifolia" "ProtectedSpecies_3"
## [81] "Zannichellia palustris" "ProtectedSpecies_4"
## [83] "Poaceae" "Potamogeton pusillus"
## [85] "Potamogeton foliosus" "Potamogeton hillii"
## [87] "Typha" "Typha glauca"
## [89] "Stuckenia" "Glyceria borealis"
## [91] "Stuckenia filiformis" "Sium suave"
## [93] "Carex comosa" "Scirpus atrovirens"
## [95] "Myriophyllum spicatum" "Sagittaria latifolia"
## [97] "Andromeda polifolia" "Eupatorium perfoliatum"
## [99] "Scirpus cyperinus" "Eragrostis"
## [101] "Phalaris arundinacea" "Sagittaria graminea"
## [103] "Scutellaria" "Sagittaria rigida"
## [105] "Sparganium eurycarpum" "Asclepias incarnata"
## [107] "Alisma triviale" "Calamagrostis canadensis"
## [109] "Eupatorium dubium" "Wolffia columbiana"
## [111] "Najas guadalupensis" "Potamogeton nodosus"
## [113] "Nelumbo lutea" "Scirpus validus"
## [115] "Nuphar advena" "Scirpus"
## [117] "Nuphar" "Nymphaea"
## [119] "Ranunculus longirostris" "Juncus"
## [121] "Fontinalis antipyretica" "Lemna"
## [123] "Ranunculus aquatilis" "Potamogeton"
## [125] "Wolffia" "Zizania"
## [127] "Equisetum fluviatile" "Eutrochium"
## [129] "Sparganium emersum" "Persicaria amphibia"
## [131] "Chara canescens" "Myriophyllum farwellii"
## [133] "Sparganium angustifolium" "Utricularia gibba"
## [135] "Potamogeton vaseyi" "Drepanocladus aduncus"
## [137] "Nitellopsis" "Eleocharis palustris"
## [139] "ProtectedSpecies_5" "Potamogeton spirillus"
## [141] "Juncus pelocarpus" "Elatine"
## [143] "Riccia fluitans" "Tolypella intricata"
## [145] "Potamogeton strictifolius" "Sparganium natans"
## [147] "Bidens" "Carex lacustris"
## [149] "Iris versicolor" "Myriophyllum verticillatum"
## [151] "Lemna turionifera" "Zosterella"
## [153] "Nuphar microphylla" "Lysimachia terrestris"
## [155] "Menyanthes trifoliata" "Carex aquatilis"
## [157] "Lobelia dortmanna" "Characeae"
## [159] "ProtectedSpecies_6" "Lamiaceae"
## [161] "Leersia oryzoides" "Sphagnum"
## [163] "Chamaedaphne calyculata" "Alnus"
## [165] "Betula pumila" "Sparganium americanum"
## [167] "Elodea nuttallii" "Lythrum salicaria"
## [169] "Impatiens capensis" "Verbena"
## [171] "Persicaria lapathifolia" "Polygonum amphibium"
## [173] "Najas minor" "Ranunculus flabellaris"
## [175] "Lychnothamnus barbatus" "Wolffia borealis"
## [177] "Caltha palustris" "Cyperaceae"
## [179] "Hypericum ellipticum" "ProtectedSpecies_7"
## [181] "Butomus umbellatus" "Scorpidium scorpioides"
## [183] "Fontinalis sullivantii" "Eupatorium maculatum"
## [185] "Ledum groenlandicum" "ProtectedSpecies_8"
## [187] "Sagittaria cristata" "Boltonia asteroides"
## [189] "Scolochloa festucacea" "ProtectedSpecies_9"
## [191] "Myrica gale" "Potamogeton obtusifolius"
## [193] "Schoenoplectus x oblongus" "Solidago"
## [195] "Utricularia macrorhiza" "Chara globularis"
## [197] "Alisma" "Asteraceae"
## [199] "Lycopus americanus" "Triadenum fraseri"
## [201] "Isoetes echinospora" "Callitriche"
## [203] "Nymphaea tuberosa" "ProtectedSpecies_10"
## [205] "Acorus" "Lysimachia"
## [207] "Myriophyllum alterniflorum" "Littorella uniflora"
## [209] "Andromeda glaucophylla" "Sphagnum magellanicum"
## [211] "Elatine minima" "Ricciocarpos natans"
## [213] "Carex pellita" "Carex scoparia"
## [215] "Iris virginica" "Juncus effusus"
## [217] "Eleocharis erythropoda" "Juncus arcticus"
## [219] "Juncus canadensis" "Ceratophyllum"
## [221] "Cicuta" "ProtectedSpecies_11"
## [223] "Nasturtium officinale" "Rumex orbiculatus"
## [225] "ProtectedSpecies_12" "Cicuta maculata"
## [227] "Veronica americana" "Bolboschoenus maritimus"
## [229] "Scutellaria lateriflora" "Myriophyllum exalbescens"
## [231] "Schoenoplectus americanus" "Riccia"
# N taxa per survey:
plants[ , .("Ntaxa" = length(unique(TAXON))) , SURVEY_ID] #if you want to name cols on the fly you need to wrap in .() which makes list from them
## SURVEY_ID Ntaxa
## <int> <int>
## 1: 1 13
## 2: 2 16
## 3: 3 25
## 4: 4 14
## 5: 5 16
## ---
## 3190: 4338 9
## 3191: 4339 8
## 3192: 4340 8
## 3193: 4341 26
## 3194: 4333 6
hist(plants[ , length(unique(TAXON)) , SURVEY_ID][ , V1], main = "N taxa per survey", xlab = "N taxa")
hist(plants[ , length(unique(TAXON)) , POINT_ID][ , V1], main = "N taxa per point", xlab = "N taxa")
# rake scale normalization -------------------------------------------
This code will clean the relative rake density data from the whole PI dataset, shifting all to a 0,1,2,3 scale. This code was developed in the surveycollation project, but is implemented here (post-collaborator feedback) to allow the collabs to specify what the rake scale they used was.
#drop surveys with max vals of 1s and 1-2s
rakes1 <- plants[RAKE_SCALE_USED %in% c(3,4,5), ]
# rakes1[ , .N , REL_ABUND]
#how many surveys in these categories?
# rakes1[ , .N , SURVEY_ID] #982
#Now shift/ realign data per discussion above
#1-4 survey shifted to 1-3
rakes1[RAKE_SCALE_USED == 4 & REL_ABUND == 3 ,
REL_ABUND := 2 ]
rakes1[RAKE_SCALE_USED == 4 & REL_ABUND == 4 ,
REL_ABUND := 3 ]
#1-5 surveys shifted to 1-3
rakes1[RAKE_SCALE_USED == 5 & (REL_ABUND == 3 |REL_ABUND == 4) ,
REL_ABUND := 2 ]
rakes1[RAKE_SCALE_USED == 5 & REL_ABUND == 5 ,
REL_ABUND := 3 ]
# #check that the max vals are all 3's
# hist(rakes1[ !is.na(REL_ABUND) , max(REL_ABUND) , SURVEY_ID ][,V1])
#
# #and all data are distributed in 1-3 rake density framework
# hist(rakes1[ !is.na(REL_ABUND) , REL_ABUND , ])
#
# # count the number of surveys we've got
# rakes1[ , .N , SURVEY_ID ] # N points per survey (includes NA's--points where no species were observed)
# put the corrected rake scales back into the plants db
# plants[ , , ]
# rakes1[ , .N , OBS_ID][N>1]
# rakes1[is.na(OBS_ID) , ,]
#remove any cases where people told us the rake scale but data to-date are not in db:
rakes1 <- rakes1[!is.na(OBS_ID)]
#pop these corrected rake scale data into the plants dataset
plants[OBS_ID %in% rakes1$OBS_ID , REL_ABUND_CORRECTED := rakes1$REL_ABUND , ]
#clean out intermediates
rm(rakes1)
#which surveys used rakabunds
plants[ , .N , RAKE_SCALE_USED]
## RAKE_SCALE_USED N
## <int> <int>
## 1: NA 485133
## 2: 4 115191
## 3: 5 98010
## 4: 3 33965
#why are there NA abunds for species in these surveys?
plants[!is.na(RAKE_SCALE_USED)& !is.na(TAXON), .N , REL_ABUND_CORRECTED ]
## REL_ABUND_CORRECTED N
## <int> <int>
## 1: 2 67019
## 2: 1 118929
## 3: 3 16762
## 4: NA 3412
#expect some NAs to come through on these. May be worth looking back at the
# DNR data and evaluating how these plant obs were recorded and how they
# report then carry through our workflow. Seems to me that these surveys
# oughtta have numbers assigned to them -- like how coudl a record show a
# species present but not have a numeric indicator on that? Were they 0/1?
plants[!is.na(RAKE_SCALE_USED) & is.na(REL_ABUND) & !is.na(TAXON), .N , SURVEY_ID ]
## SURVEY_ID N
## <int> <int>
## 1: 430 272
## 2: 480 1121
## 3: 515 58
## 4: 592 11
## 5: 718 23
## 6: 738 22
## 7: 855 29
## 8: 1531 1857
## 9: 3172 18
## 10: 2822 1
bad_abund_surveys <- plants[!is.na(RAKE_SCALE_USED) & is.na(REL_ABUND) & !is.na(TAXON), unique(SURVEY_ID), ]
#force pres/abs on those
plants[SURVEY_ID %in% bad_abund_surveys, c("RAKE_SCALE_USED", "REL_ABUND", "REL_ABUND_CORRECTED"):= NA , ]
# georeference data -------------------------------------------------------
This section uses MN hydrography geodata to add direct geodata into the dataset. After run, pwi_l and plants can be linked on the shared “order_ID” column, and HUC-8 level watersheds are included in the dataset
# merge geospatial files
#change sf data.frame to a data.table
setDT(pwi_l)
# linking plants db to spatial reference:
#shapefile dows need to be made numeric (drops leading zeros)
# pwi_l[ , dowlknum , ]
pwi_l[ , dow_main := round(as.numeric(dowlknum)/100,0)*100 , ]
#there's a lot of junk in there, work towards a 1:1 of plants dows and pwi_l dows
pwi_l <- pwi_l[!is.na(dowlknum)]# drops many polygons that aren't lakes (islands, rivers, etc)
pwi_l[ , order_ID:= .I , ]#adds a key
#drop non-mn shapes
pwi_l <- pwi_l[!outside_mn == "Y"]
#which dows are duplicated in the shapes?
pwi_l[pwi_l[, duplicated(dowlknum),], dowlknum]
## [1] "32005700" "66001400" "34002800" "43011500" "86017800" "70001600"
## [7] "27000300" "02000500" "86025202" "86025202" "73008200" "75002400"
## [13] "78002400" "56097900" "56078100" "18039400" "18014500" "09003900"
## [19] "18026900" "18031100" "03007700" "03011200" "56164900" "03030400"
## [25] "03037402" "56078600" "60021700" "60017800" "60021700" "04029700"
## [31] "31090300" "31090300" "69058000" "69000300" "16063300" "69034500"
#lets review those data and see if we can devise any cleaning ideas
#pwi_l[dowlknum %in% pwi_l[pwi_l[, duplicated(dowlknum),], dowlknum],]
# we can just use the first instance of these duplicated waterbodies:
# we'll do that by dropping the subsequent duplicates!
pwi_l <- pwi_l[!pwi_l[, duplicated(dowlknum),], , ]
# missing matches in the plants data to shapefile dows
sum(is.na(match(plants[ , unique(DOW) ,], unique(pwi_l[ , dowlknum , ]))))
## [1] 217
# missing matches in the plants data to shapefile mainlake dows
sum(is.na(match(plants[ , unique(DOW) ,], unique(pwi_l[ , dow_main , ]))))
## [1] 42
#append a polygon value to the plants data (here we'll use our order_ID from above)
plants[ , order_ID := pwi_l[ match( plants[ , DOW ,], pwi_l[ , as.numeric(dow_main) , ]) , order_ID , ] ]
#and any that didn't match on that, try the basin specific
plants[ is.na(order_ID) , order_ID := pwi_l[ match(plants[ is.na(order_ID) , DOW ,], pwi_l[ , as.numeric(dowlknum) , ]), order_ID , ] ]
# now to navigate these last non-compliant ones...
plants[is.na(order_ID) & !is.na(DOW), .N , .(LAKE_NAME, DOW, DATASOURCE)]
## LAKE_NAME DOW DATASOURCE N
## <char> <int> <char> <int>
## 1: sakatah bay 40000201 source_17 934
## 2: rrwma - pool 1 - east 68000501 Muthukrishnan Et al 74
## 3: rrwma - pool 1 - west 68000502 Muthukrishnan Et al 94
## 4: katherine abbott pond 82009999 source_25 36
pwi_l[ dowlknum == "40000200", order_ID]
## [1] 1267
plants[ DOW == 40000201, order_ID := pwi_l[ dowlknum == "40000200", order_ID] ] #Upper Sakatah polygon
pwi_l[ dowlknum == "68000500", order_ID]
## [1] 26678
plants[ DOW %in% c(68000501,68000502), order_ID := pwi_l[ dowlknum == "68000500", order_ID] ] #Roseau River WMA
# pwi_l[ dowlknum == "70005000", order_ID]
# plants[ DOW == 70050000, ] #Carls Lake
#
pwi_l[ dowlknum == "82011800", order_ID]
## [1] 7235
plants[ DOW == 82009999, order_ID := pwi_l[ dowlknum == "82011800", order_ID] ] #Katherine Abbott Pond
# the plants datset lakes with no geodata in the hydrography layer we used:
plants[is.na(order_ID) , .N , .(LAKE_NAME, DOW, DATASOURCE)]
## LAKE_NAME DOW DATASOURCE N
## <char> <int> <char> <int>
## 1: pool 2 NA Muthukrishnan Et al 97
## 2: reynen pond NA Muthukrishnan Et al 46
## 3: unnamed delong wpa NA Muthukrishnan Et al 22
## 4: big sob NA source_26 64
## 5: ivanhoe wma east basin NA Muthukrishnan Et al 26
## 6: olson pool NA Muthukrishnan Et al 104
## 7: goldmine slough section - vermilion river NA Muthukrishnan Et al 215
## 8: vermilion falls section - vermilion river NA Muthukrishnan Et al 88
## 9: gull river NA Muthukrishnan Et al 335
## 10: mississippi river NA Muthukrishnan Et al 1769
## 11: little elk wma NA Muthukrishnan Et al 98
## 12: unnamed NA Muthukrishnan Et al 20
## 13: sand prairie wma NA Muthukrishnan Et al 75
## 14: loerch wma impoundment NA Muthukrishnan Et al 31
## 15: daggett brook wma impoundment NA Muthukrishnan Et al 107
## 16: sterle pool NA Muthukrishnan Et al 175
## 17: trettle pool NA Muthukrishnan Et al 323
## 18: dundee marsh NA Muthukrishnan Et al 8
# in total, this is 18 surveys and 3611 observations without ANY geolocation
plants[, summary(order_ID)]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 13 5269 9954 11269 17166 26778 3603
# fix up local geospatial info
#check for weird X,Y vals in th UTM-looking columns
plants[!is.na("X"), summary(X) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 45.1 341123.0 495722.0 464619.0 564792.0 598930.0 728703
plants[!is.na("Y"), summary(Y) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -93 166689 4909613 2728113 5084488 5184300 725818
#some are clearly lat/longs
plants[X < 4600, X ,]
## [1] 45.09664 45.09664 45.09664 45.09664 45.09664 45.09664 45.09664 45.09664
## [9] 45.09664 45.09664 45.09664 45.09727 45.09727 45.09727 45.09727 45.09727
plants[X < 4600, LATITUDE := X ,]
plants[X < 4600, X := NA ,]
plants[Y<10000, summary(Y) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -93.07 44.90 44.91 43.39 44.98 45.00
plants[Y<10000 & Y>0, LATITUDE := Y ,]
plants[Y<10000 & Y>0, Y := NA ,]
plants[Y<10000, LONGITUDE := Y ,]
plants[Y<10000, Y := NA ,]
#whatever the heck is leftover here is weeeeeird and muddled.
plants[!is.na("X"), summary(X) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 237606 341171 495725 466695 564792 598930 728719
plants[!is.na("Y"), summary(Y) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 140723 210010 5003604 3499077 5086243 5184300 727246
# we need to delete these non-UTM vals form X & Y
# plants[Y<4800000, .N, DATASOURCE ]
plants[ Y < 4800000, c("X","Y") := NA, ]
#looks clean, now move into the UTM slots?
plants[!is.na(UTMY) , summary(UTMY) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4828777 5066644 5161078 5134746 5204415 5339053
plants[!is.na(UTMX) , summary(UTMX) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 229942 380655 408661 417766 453041 712374
#any conflicts with UTM loc data?
plants[!is.na(UTMX) & !is.na(X)]
## Empty data.table (0 rows and 50 cols): SURVEY_ID,LAKE_NAME,DATASOURCE,SURVEY_DATE,STA_NBR_DATASOURCE,DEPTH_FT...
plants[!is.na(UTMY) & !is.na(Y)]
## Empty data.table (0 rows and 50 cols): SURVEY_ID,LAKE_NAME,DATASOURCE,SURVEY_DATE,STA_NBR_DATASOURCE,DEPTH_FT...
#move X, Y to UTMs
plants[!is.na(X) , UTMX := X , ]
plants[!is.na(Y) , UTMY := Y , ]
plants[ , c("X", "Y") := NULL , ]
#now Northing Easting, which happen to look like clean UTM data
plants[!is.na(NORTHING), summary(NORTHING) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4860888 5003560 5136308 5089221 5203396 5208310
plants[!is.na(EASTING), summary(EASTING) ,]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 315321 339882 401439 413157 455109 515302
#overlap/ conflict?
plants[!is.na(UTMX) & !is.na(NORTHING)]
## Empty data.table (0 rows and 48 cols): SURVEY_ID,LAKE_NAME,DATASOURCE,SURVEY_DATE,STA_NBR_DATASOURCE,DEPTH_FT...
plants[!is.na(UTMY) & !is.na(EASTING)]
## Empty data.table (0 rows and 48 cols): SURVEY_ID,LAKE_NAME,DATASOURCE,SURVEY_DATE,STA_NBR_DATASOURCE,DEPTH_FT...
#move Northing and easting to UTMs
plants[!is.na(NORTHING), UTMY := NORTHING ,]
plants[!is.na(EASTING), UTMX := EASTING ,]
plants[ , c("NORTHING", "EASTING") := NULL , ]
#now get all into same CRS:
#conflicts?
plants[!is.na(UTMX) & !is.na(LONGITUDE)]
## Empty data.table (0 rows and 46 cols): SURVEY_ID,LAKE_NAME,DATASOURCE,SURVEY_DATE,STA_NBR_DATASOURCE,DEPTH_FT...
plants[!is.na(UTMY) & !is.na(LATITUDE)]
## Empty data.table (0 rows and 46 cols): SURVEY_ID,LAKE_NAME,DATASOURCE,SURVEY_DATE,STA_NBR_DATASOURCE,DEPTH_FT...
plants[!is.na(LONGITUDE), UTMX:=NA]
plants[!is.na(LATITUDE), UTMY:=NA]
#here we'll split into a non, UTM, and LL georef set, then convert ref'd to sf objects, then merge all back together
# plants complete x,y in one CRS or another? NOPE... Oh well. moving on.
# plants[!is.na(UTMX) & is.na(UTMY)]
# # plants[]
#
# plants[!is.na(LATITUDE) & is.na(LONGITUDE)]
# plants[is.na(LATITUDE) & !is.na(LONGITUDE)]
#Conversion of data frame to sf object (note we've assumed NAD1983, Z15N for UTMs)
plants_UTMS <- st_as_sf(x = plants[!is.na(UTMX)],
coords = c("UTMX", "UTMY"),
crs = "+proj=utm +zone=15")
#Projection transformation
plants_U_LL = st_transform(plants_UTMS, crs = "+proj=longlat +datum=WGS84")
setDT(plants_U_LL)
#Conversion of data frame to sf object
plants_LLS <- st_as_sf(x = plants[!is.na(LONGITUDE)],
coords = c("LONGITUDE", "LATITUDE"),
crs = "+proj=longlat +datum=WGS84")
setDT(plants_LLS)
#drop unusedCRS cols from each:
plants_U_LL[ , c("LATITUDE", "LONGITUDE") := NULL , ]
plants_LLS[ , c("UTMX","UTMY") := NULL, ]
plants2 <- rbindlist(list(plants_LLS, plants_U_LL))
plants2 <- cbind(plants2, st_coordinates(st_as_sf(plants2)))
plants2[ , geometry := NULL ,]
names(plants2)[names(plants2)%in% c("X","Y")] <- c("Longitude","Latitude")
#merge back to plants (check dims to ensure no duplications or overlaps):
dim(plants)
## [1] 732299 46
plants[is.na(UTMX) & is.na(LONGITUDE) , .N , ]+
plants2[ , .N ,]
## [1] 732299
plants1 <- plants[is.na(UTMX) & is.na(LONGITUDE), ]
plants1[ , c("UTMX", "UTMY", "LATITUDE", "LONGITUDE") := NULL ,]
plants1[ , c("Longitude", "Latitude") := NA, ]
nrow(plants1)+nrow(plants2)
## [1] 732299
plants <- rbindlist(list(plants1, plants2))
rm(plants_LLS,plants_U_LL, plants1, plants2)
# label all pwi_l with watershed names
# st_join(pwi_l, watersheds_huc8)
#
# st_crs(plants_UTMS) <- st_crs(watersheds_huc8)
#
# plantsUTMS <- st_join(plants_UTMS, left = TRUE, watersheds_huc8)
pwi_l <- st_sf(pwi_l)
st_crs(pwi_l) <- st_crs(watersheds_huc8) #ignore warning, no re-projection needed in this case, we do this because I lost the crs in some of my data manipulation
pwi_l <- st_join(pwi_l, left = TRUE, watersheds_huc8)
setDT(pwi_l)
rm( plants_UTMS)
# add in secchi data ------------------------------------------------------
This code will conduct an eval of the fuzzy join of Secchi to plants data, calculate Secchi metrics based on the chosen fuzzy join, then excute the join. The code includes a solution adapted from a script written by Dan Larkin for the niches project (https://conservancy.umn.edu/handle/11299/218009).
Assign a Secchi to each observationuse the closest Secchi temporally. For each plant observation, we’ll append the Secchi observation from that DOW that was closest in time to the plant obs.
# number of observations
hist(secchi[,year(Date)])
hist(secchi[,month(Date)])
secchi[,.N,Source]
## Source N
## <char> <int>
## 1: Shallow Lakes 1124
## 2: PCA 576270
## 3: DNRFish 10165
## 4: DNR Historical Files 4013
## 5: ll_biologist_files 2320
# and for the plants data?
plants[ is.na(INDATABASE) , YEAR:= year(as.IDate(DATESURVEYSTART)) , ] #new data imports need to move date in from the chr strings
plants[ is.na(INDATABASE) , SURVEY_DATE:= as.IDate(DATESURVEYSTART) , ] #new data imports need to move date in from the chr strings
hist(plants[ ,.N , .(SURVEY_ID,YEAR) ][ , YEAR,])
#clean some data in prep for join
secchi[, YEAR := year(Date)]
secchi[, MONTH := month(Date) ]
secchi[ , old_DOW := DOW]
secchi[, DOW := as.integer(DOW)]
secchi[ is.na(DOW) , old_DOW ]
## [1] "03IMP002" "24IMP001" "26IMP001" "R001-46G" "R001-46V" "R1-96-1" "W0127601"
## [8] "W0655001"
#how many survey DOW's have a secchi for the lake (ever)?
summary(plants[ , unique(DOW) , ]%in%secchi[ ,DOW ,])
## Mode FALSE TRUE
## logical 156 1364
#how many surveys have a secchi for that year?
summary(plants[ , .N ,.(DOW,YEAR) ][,paste(DOW,YEAR, sep = "_"),] %in% secchi[ ,paste(DOW,YEAR, sep = "_") ,])
## Mode FALSE TRUE
## logical 555 2129
#how many surveys have a secchi for that month?
summary(plants[ , .N ,.(DOW,YEAR,MONTH) ][,paste(paste(DOW,YEAR, sep = "_"),MONTH, sep = "_"),] %in%
secchi[ ,paste(paste(DOW,YEAR, sep = "_"),MONTH, sep = "_") ,])
## Mode FALSE TRUE
## logical 729 2373
plants[ , date := SURVEY_DATE]
#consolidate to the DOW-Date level
secchi <- secchi[ , .("Secchi_m" = mean(Secchi_m)) , .(DOW, Date) ]
secchi[ , SECCHI_DATE := Date]
secchi <- secchi[!is.na(DOW)]
plants <- secchi[plants, , on = .(DOW, Date = date), roll='nearest' ]
#drop the Date field (now a dup of SURVEY_DATE)
plants[ , Date := NULL]
#how far apart are plant and secchi obs?
hist(plants[,SURVEY_DATE-SECCHI_DATE,])
#keep only Secchi obs within a month (date+/-30d)
hist(plants[ , abs(yday(SECCHI_DATE) - yday(SURVEY_DATE)), ])
plants[abs(yday(SECCHI_DATE) - yday(SURVEY_DATE))<30 &
abs(year(SECCHI_DATE) - year(SURVEY_DATE))<1, SECCHI_m_ACCEPTED := Secchi_m ]
#cleanup:
rm(secchi)
# calculate point level light avail
plants[ , proplight := exp(-(log(10)/SECCHI_m_ACCEPTED)*(DEPTH_FT/3.2804)) ]
nrow(plants[!is.na(proplight) , .N , POINT_ID])/ #how many points can we do this for?
nrow(plants[, .N , POINT_ID])#out of total n points
## [1] 0.7834643
plants[ ,hist(proplight, breaks = 100, main = "78% coverage for light availability")]
## $breaks
## [1] 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.10 0.11 0.12 0.13 0.14
## [16] 0.15 0.16 0.17 0.18 0.19 0.20 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29
## [31] 0.30 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.40 0.41 0.42 0.43 0.44
## [46] 0.45 0.46 0.47 0.48 0.49 0.50 0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59
## [61] 0.60 0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.70 0.71 0.72 0.73 0.74
## [76] 0.75 0.76 0.77 0.78 0.79 0.80 0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89
## [91] 0.90 0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.00
##
## $counts
## [1] 87425 21541 20051 14027 13790 12125 12384 11809 10560 16318 8779 10295
## [13] 11928 10512 9710 10318 7846 11072 7909 9283 5139 11261 5899 7674
## [25] 6808 7357 8058 6522 7034 5416 4022 9332 4087 6153 4514 7329
## [37] 4885 5087 3903 5843 3253 5504 4414 5296 4431 3439 5605 2951
## [49] 3747 3006 3890 5037 2405 3224 4298 1994 3505 3115 2712 3075
## [61] 2234 3197 1338 3328 2724 2579 2934 1052 2193 1870 1651 2135
## [73] 1066 1485 1283 1893 660 1695 906 1243 1033 1312 768 909
## [85] 674 523 664 361 419 566 409 291 168 120 107 122
## [97] 55 41 37 4525
##
## $density
## [1] 15.351832211 3.782600145 3.520956107 2.463141555 2.421524349
## [6] 2.129150307 2.174630713 2.073660699 1.854336267 2.865441213
## [11] 1.541592622 1.807802260 2.094557102 1.845907466 1.705076246
## [16] 1.811841061 1.377757798 1.944243480 1.388820600 1.630095035
## [21] 0.902408530 1.977431885 1.035864549 1.347554594 1.195484972
## [26] 1.291889386 1.414985004 1.145263365 1.235170578 0.951049737
## [31] 0.706263302 1.638699436 0.717677303 1.080466956 0.792658514
## [36] 1.286972585 0.857806124 0.893277329 0.685366899 1.026030948
## [41] 0.571226882 0.966502539 0.775098512 0.929977734 0.778083712
## [46] 0.603888487 0.984238142 0.518195675 0.657973295 0.527853676
## [51] 0.683084098 0.884497327 0.422318061 0.566134482 0.754728909
## [56] 0.350146450 0.615478089 0.546994079 0.476227269 0.539970078
## [61] 0.392290456 0.561393281 0.234952834 0.584396884 0.478334469
## [66] 0.452872465 0.515210474 0.184731227 0.385090855 0.328372047
## [71] 0.289915642 0.374906054 0.187189627 0.260766038 0.225294832
## [76] 0.332410848 0.115896017 0.297642043 0.159093623 0.218270831
## [81] 0.181394826 0.230387233 0.134860819 0.159620423 0.118354417
## [86] 0.091838813 0.116598417 0.063391609 0.073576411 0.099389614
## [91] 0.071820410 0.051099607 0.029500804 0.021072003 0.018789203
## [96] 0.021423203 0.009658001 0.007199601 0.006497201 0.794590114
##
## $mids
## [1] 0.005 0.015 0.025 0.035 0.045 0.055 0.065 0.075 0.085 0.095 0.105 0.115
## [13] 0.125 0.135 0.145 0.155 0.165 0.175 0.185 0.195 0.205 0.215 0.225 0.235
## [25] 0.245 0.255 0.265 0.275 0.285 0.295 0.305 0.315 0.325 0.335 0.345 0.355
## [37] 0.365 0.375 0.385 0.395 0.405 0.415 0.425 0.435 0.445 0.455 0.465 0.475
## [49] 0.485 0.495 0.505 0.515 0.525 0.535 0.545 0.555 0.565 0.575 0.585 0.595
## [61] 0.605 0.615 0.625 0.635 0.645 0.655 0.665 0.675 0.685 0.695 0.705 0.715
## [73] 0.725 0.735 0.745 0.755 0.765 0.775 0.785 0.795 0.805 0.815 0.825 0.835
## [85] 0.845 0.855 0.865 0.875 0.885 0.895 0.905 0.915 0.925 0.935 0.945 0.955
## [97] 0.965 0.975 0.985 0.995
##
## $xname
## [1] "proplight"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
# clean taxa names here ---------------------------------------------------
#these names ought to be pretty close already.
plants[ , .N , TAXON]
## TAXON N
## <char> <int>
## 1: Ceratophyllum demersum 91658
## 2: Vallisneria americana 13764
## 3: Drepanocladus 1772
## 4: <NA> 140457
## 5: Potamogeton robbinsii 10131
## ---
## 228: Littorella uniflora 3
## 229: Andromeda glaucophylla 2
## 230: Sphagnum magellanicum 1
## 231: Elatine minima 3
## 232: Cicuta maculata 1
plants[ , sort(unique(tolower(gsub("_", " ", TAXON)))) , ]
## [1] "acorus" "acorus americanus"
## [3] "alisma" "alisma triviale"
## [5] "alnus" "andromeda glaucophylla"
## [7] "andromeda polifolia" "asclepias incarnata"
## [9] "asteraceae" "betula pumila"
## [11] "bidens" "bidens beckii"
## [13] "bolboschoenus fluviatilis" "bolboschoenus maritimus"
## [15] "boltonia asteroides" "brasenia schreberi"
## [17] "butomus umbellatus" "calamagrostis canadensis"
## [19] "calla palustris" "callitriche"
## [21] "caltha palustris" "carex"
## [23] "carex aquatilis" "carex comosa"
## [25] "carex lacustris" "carex pellita"
## [27] "carex scoparia" "ceratophyllum"
## [29] "ceratophyllum demersum" "chamaedaphne calyculata"
## [31] "chara" "chara canescens"
## [33] "chara globularis" "characeae"
## [35] "cicuta" "cicuta maculata"
## [37] "cyperaceae" "drepanocladus"
## [39] "drepanocladus aduncus" "dulichium arundinaceum"
## [41] "elatine" "elatine minima"
## [43] "eleocharis" "eleocharis acicularis"
## [45] "eleocharis erythropoda" "eleocharis palustris"
## [47] "eleocharis smallii" "elodea"
## [49] "elodea canadensis" "elodea nuttallii"
## [51] "equisetum" "equisetum fluviatile"
## [53] "eragrostis" "eriocaulon aquaticum"
## [55] "eupatorium dubium" "eupatorium maculatum"
## [57] "eupatorium perfoliatum" "eutrochium"
## [59] "fontinalis antipyretica" "fontinalis sullivantii"
## [61] "glyceria borealis" "heteranthera dubia"
## [63] "hippuris vulgaris" "hypericum"
## [65] "hypericum ellipticum" "impatiens"
## [67] "impatiens capensis" "iris"
## [69] "iris versicolor" "iris virginica"
## [71] "isoetes" "isoetes echinospora"
## [73] "juncus" "juncus arcticus"
## [75] "juncus canadensis" "juncus effusus"
## [77] "juncus pelocarpus" "lamiaceae"
## [79] "ledum groenlandicum" "leersia oryzoides"
## [81] "lemna" "lemna minor"
## [83] "lemna trisulca" "lemna turionifera"
## [85] "littorella uniflora" "lobelia dortmanna"
## [87] "lychnothamnus barbatus" "lycopus americanus"
## [89] "lysimachia" "lysimachia terrestris"
## [91] "lythrum salicaria" "menyanthes trifoliata"
## [93] "myrica gale" "myriophyllum"
## [95] "myriophyllum alterniflorum" "myriophyllum exalbescens"
## [97] "myriophyllum farwellii" "myriophyllum sibiricum"
## [99] "myriophyllum spicatum" "myriophyllum tenellum"
## [101] "myriophyllum verticillatum" "najas"
## [103] "najas flexilis" "najas guadalupensis"
## [105] "najas minor" "nasturtium officinale"
## [107] "nelumbo lutea" "nitella"
## [109] "nitellopsis" "nuphar"
## [111] "nuphar advena" "nuphar microphylla"
## [113] "nuphar variegata" "nymphaea"
## [115] "nymphaea odorata" "nymphaea tuberosa"
## [117] "nymphaeaceae" "persicaria"
## [119] "persicaria amphibia" "persicaria lapathifolia"
## [121] "phalaris arundinacea" "phragmites australis"
## [123] "poaceae" "polygonum amphibium"
## [125] "pontederia cordata" "potamogeton"
## [127] "potamogeton (broad)" "potamogeton (narrow)"
## [129] "potamogeton alpinus" "potamogeton amplifolius"
## [131] "potamogeton crispus" "potamogeton epihydrus"
## [133] "potamogeton foliosus" "potamogeton friesii"
## [135] "potamogeton gramineus" "potamogeton hillii"
## [137] "potamogeton illinoensis" "potamogeton natans"
## [139] "potamogeton nodosus" "potamogeton obtusifolius"
## [141] "potamogeton praelongus" "potamogeton pusillus"
## [143] "potamogeton richardsonii" "potamogeton robbinsii"
## [145] "potamogeton spirillus" "potamogeton strictifolius"
## [147] "potamogeton vaseyi" "potamogeton zosteriformis"
## [149] "potentilla palustris" "protectedspecies 1"
## [151] "protectedspecies 10" "protectedspecies 11"
## [153] "protectedspecies 12" "protectedspecies 2"
## [155] "protectedspecies 3" "protectedspecies 4"
## [157] "protectedspecies 5" "protectedspecies 6"
## [159] "protectedspecies 7" "protectedspecies 8"
## [161] "protectedspecies 9" "ranunculus"
## [163] "ranunculus aquatilis" "ranunculus flabellaris"
## [165] "ranunculus flammula" "ranunculus longirostris"
## [167] "riccia" "riccia fluitans"
## [169] "ricciocarpos natans" "rumex orbiculatus"
## [171] "sagittaria" "sagittaria cristata"
## [173] "sagittaria cuneata" "sagittaria graminea"
## [175] "sagittaria latifolia" "sagittaria rigida"
## [177] "salix" "schoenoplectus"
## [179] "schoenoplectus acutus" "schoenoplectus americanus"
## [181] "schoenoplectus pungens" "schoenoplectus subterminalis"
## [183] "schoenoplectus tabernaemontani" "schoenoplectus x oblongus"
## [185] "scirpus" "scirpus atrovirens"
## [187] "scirpus cyperinus" "scirpus validus"
## [189] "scolochloa festucacea" "scorpidium scorpioides"
## [191] "scutellaria" "scutellaria lateriflora"
## [193] "sium suave" "solidago"
## [195] "sparganium" "sparganium (emergent)"
## [197] "sparganium (floating)" "sparganium americanum"
## [199] "sparganium angustifolium" "sparganium emersum"
## [201] "sparganium eurycarpum" "sparganium fluctuans"
## [203] "sparganium natans" "sphagnum"
## [205] "sphagnum magellanicum" "spirodela polyrhiza"
## [207] "stuckenia" "stuckenia filiformis"
## [209] "stuckenia pectinata" "tolypella intricata"
## [211] "triadenum fraseri" "typha"
## [213] "typha angustifolia" "typha glauca"
## [215] "typha latifolia" "utricularia"
## [217] "utricularia gibba" "utricularia intermedia"
## [219] "utricularia macrorhiza" "utricularia minor"
## [221] "utricularia vulgaris" "vallisneria americana"
## [223] "verbena" "veronica americana"
## [225] "wolffia" "wolffia borealis"
## [227] "wolffia columbiana" "zannichellia palustris"
## [229] "zizania" "zizania palustris"
## [231] "zosterella"
plants[ , TAXON := tolower(gsub("_", " ", TAXON)) , ]
# clean up taxonomy using macroniche (see paper or git repo) taxonomy ------------------------------------------------------
Verhoeven, M. R., Glisson, W. J., & Larkin, D. J. (2020). Niche models
#' differentiate potential impacts of two aquatic invasive plant species on
#' native macrophytes. Diversity, 12, 162. https://doi.org/10.3390/d12040162
# pull in taxonomy corrections:
tnrs <- fread( file = "data&scripts/data/input/tnrs.final.csv", drop = 1)
tnrs[ , submittedname := tolower(gsub("\\.", "", submittedname)) ,]#make format match
sum(plants$TAXON %in% tnrs$submittedname)
## [1] 484941
tnrs[match(plants$TAXON, tnrs$submittedname), "species"]
## species
## <char>
## 1: Ceratophyllum demersum
## 2: Vallisneria americana
## 3: <NA>
## 4: <NA>
## 5: <NA>
## ---
## 732295: Myriophyllum sibiricum
## 732296: <NA>
## 732297: Heteranthera dubia
## 732298: Lemna trisulca
## 732299: Myriophyllum sibiricum
# implement name changes --------------------------------------------------------
# Add in a Taxon corrected column
plants[, TAXONC := tnrs[match(plants$TAXON, tnrs$submittedname), "species"], ]
#these are all looking good to me.
print(plants[ , .N , TAXONC], max.levels = 200)
## TAXONC N
## <char> <int>
## 1: Ceratophyllum demersum 91658
## 2: Vallisneria americana 13764
## 3: <NA> 247358
## 4: Potamogeton robbinsii 10131
## 5: Nuphar variegata 10020
## ---
## 152: Littorella uniflora 3
## 153: Andromeda glaucophylla 2
## 154: Sphagnum magellanicum 1
## 155: Elatine minima 3
## 156: Cicuta maculata 1
#tidy up names
plants[is.na(TAXONC), .N , TAXON ] #review unmatched
## TAXON N
## <char> <int>
## 1: drepanocladus 1772
## 2: <NA> 140457
## 3: carex 680
## 4: equisetum 294
## 5: sagittaria 1895
## 6: sparganium (floating) 865
## 7: eleocharis 1128
## 8: nitella 1481
## 9: chara 51713
## 10: sparganium (emergent) 90
## 11: nymphaeaceae 5
## 12: potamogeton (narrow) 7426
## 13: protectedspecies 1 243
## 14: utricularia 1314
## 15: iris 13
## 16: eleocharis smallii 67
## 17: salix 58
## 18: schoenoplectus 5094
## 19: sparganium 721
## 20: elodea 2519
## 21: najas 7351
## 22: isoetes 166
## 23: ranunculus 2091
## 24: hypericum 20
## 25: persicaria 174
## 26: impatiens 14
## 27: protectedspecies 2 884
## 28: potamogeton (broad) 61
## 29: myriophyllum 2078
## 30: protectedspecies 3 277
## 31: protectedspecies 4 114
## 32: poaceae 43
## 33: typha 1529
## 34: typha glauca 28
## 35: stuckenia 104
## 36: andromeda polifolia 2
## 37: eragrostis 4
## 38: scutellaria 7
## 39: alisma triviale 4
## 40: scirpus 639
## 41: juncus 45
## 42: lemna 1112
## 43: potamogeton 5876
## 44: wolffia 1205
## 45: zizania 502
## 46: protectedspecies 5 162
## 47: elatine 5
## 48: bidens 24
## 49: characeae 1479
## 50: nymphaea 1945
## 51: protectedspecies 6 3
## 52: lamiaceae 6
## 53: sphagnum 4
## 54: alnus 14
## 55: verbena 8
## 56: nuphar 1100
## 57: protectedspecies 7 14
## 58: protectedspecies 8 9
## 59: protectedspecies 9 58
## 60: zosterella 20
## 61: callitriche 2
## 62: protectedspecies 10 4
## 63: cyperaceae 6
## 64: alisma 29
## 65: ceratophyllum 4
## 66: cicuta 1
## 67: protectedspecies 11 1
## 68: nasturtium officinale 1
## 69: protectedspecies 12 11
## 70: bolboschoenus maritimus 2
## 71: ricciocarpos natans 2
## 72: riccia 12
## 73: eutrochium 3
## 74: nitellopsis 259
## 75: solidago 2
## 76: acorus 1
## 77: lysimachia 2
## TAXON N
plants[is.na(TAXONC) & str_detect(TAXON, "\\(", negate = T ) & sapply(strsplit(TAXON, " "), length) == 2,
.N ,
sub("\\b(\\w)(\\w*)\\b", "\\U\\1\\L\\2", TAXON, perl = TRUE) ]
## sub N
## <char> <int>
## 1: Protectedspecies 1 243
## 2: Eleocharis smallii 67
## 3: Protectedspecies 2 884
## 4: Protectedspecies 3 277
## 5: Protectedspecies 4 114
## 6: Typha glauca 28
## 7: Andromeda polifolia 2
## 8: Alisma triviale 4
## 9: Protectedspecies 5 162
## 10: Protectedspecies 6 3
## 11: Protectedspecies 7 14
## 12: Protectedspecies 8 9
## 13: Protectedspecies 9 58
## 14: Protectedspecies 10 4
## 15: Protectedspecies 11 1
## 16: Nasturtium officinale 1
## 17: Protectedspecies 12 11
## 18: Bolboschoenus maritimus 2
## 19: Ricciocarpos natans 2
#fix first cap in binomials:
plants[is.na(TAXONC) & str_detect(TAXON, "\\(", negate = T ) & sapply(strsplit(TAXON, " "), length) == 2,
TAXON := sub("\\b(\\w)(\\w*)\\b", "\\U\\1\\L\\2", TAXON, perl = TRUE) ]
# overwrite TAXON with corrected names
plants[!is.na(TAXONC), TAXON := TAXONC]
#delete correction col:
plants[ , TAXONC := NULL ,]
#review taxa
plants[ , sort(unique(TAXON)) ,]
## [1] "acorus" "Acorus americanus"
## [3] "alisma" "Alisma triviale"
## [5] "alnus" "Andromeda glaucophylla"
## [7] "Andromeda polifolia" "Asclepias incarnata"
## [9] "Asteraceae" "Betula pumila"
## [11] "bidens" "Bidens beckii"
## [13] "Bolboschoenus fluviatilis" "Bolboschoenus maritimus"
## [15] "Boltonia asteroides" "Brasenia schreberi"
## [17] "Butomus umbellatus" "Calamagrostis canadensis"
## [19] "Calla palustris" "callitriche"
## [21] "Caltha palustris" "carex"
## [23] "Carex aquatilis" "Carex comosa"
## [25] "Carex lacustris" "Carex pellita"
## [27] "Carex scoparia" "ceratophyllum"
## [29] "Ceratophyllum demersum" "Chamaedaphne calyculata"
## [31] "chara" "Chara canescens"
## [33] "Chara globularis" "characeae"
## [35] "cicuta" "Cicuta maculata"
## [37] "cyperaceae" "drepanocladus"
## [39] "Drepanocladus aduncus" "Dulichium arundinaceum"
## [41] "elatine" "Elatine minima"
## [43] "eleocharis" "Eleocharis acicularis"
## [45] "Eleocharis erythropoda" "Eleocharis palustris"
## [47] "Eleocharis smallii" "elodea"
## [49] "Elodea canadensis" "Elodea nuttallii"
## [51] "equisetum" "Equisetum fluviatile"
## [53] "eragrostis" "Eriocaulon aquaticum"
## [55] "Eupatorium dubium" "Eupatorium maculatum"
## [57] "Eupatorium perfoliatum" "eutrochium"
## [59] "Fontinalis antipyretica" "Fontinalis sullivantii"
## [61] "Glyceria borealis" "Heteranthera dubia"
## [63] "Hippuris vulgaris" "hypericum"
## [65] "Hypericum ellipticum" "impatiens"
## [67] "Impatiens capensis" "iris"
## [69] "Iris versicolor" "Iris virginica"
## [71] "isoetes" "Isoetes echinospora"
## [73] "juncus" "Juncus arcticus"
## [75] "Juncus canadensis" "Juncus effusus"
## [77] "Juncus pelocarpus" "lamiaceae"
## [79] "Ledum groenlandicum" "Leersia oryzoides"
## [81] "lemna" "Lemna minor"
## [83] "Lemna trisulca" "Lemna turionifera"
## [85] "Littorella uniflora" "Lobelia dortmanna"
## [87] "Lychnothamnus barbatus" "Lycopus americanus"
## [89] "lysimachia" "Lysimachia terrestris"
## [91] "Lythrum salicaria" "Menyanthes trifoliata"
## [93] "Myrica gale" "myriophyllum"
## [95] "Myriophyllum alterniflorum" "Myriophyllum exalbescens"
## [97] "Myriophyllum farwellii" "Myriophyllum sibiricum"
## [99] "Myriophyllum spicatum" "Myriophyllum tenellum"
## [101] "Myriophyllum verticillatum" "najas"
## [103] "Najas flexilis" "Najas guadalupensis"
## [105] "Najas minor" "Nasturtium officinale"
## [107] "Nelumbo lutea" "nitella"
## [109] "nitellopsis" "nuphar"
## [111] "Nuphar advena" "Nuphar microphylla"
## [113] "Nuphar variegata" "nymphaea"
## [115] "Nymphaea odorata" "Nymphaea tuberosa"
## [117] "nymphaeaceae" "persicaria"
## [119] "Persicaria amphibia" "Persicaria lapathifolia"
## [121] "Phalaris arundinacea" "Phragmites australis"
## [123] "poaceae" "Polygonum amphibium"
## [125] "Pontederia cordata" "potamogeton"
## [127] "potamogeton (broad)" "potamogeton (narrow)"
## [129] "Potamogeton alpinus" "Potamogeton amplifolius"
## [131] "Potamogeton crispus" "Potamogeton epihydrus"
## [133] "Potamogeton foliosus" "Potamogeton friesii"
## [135] "Potamogeton gramineus" "Potamogeton hillii"
## [137] "Potamogeton illinoensis" "Potamogeton natans"
## [139] "Potamogeton nodosus" "Potamogeton obtusifolius"
## [141] "Potamogeton praelongus" "Potamogeton pusillus"
## [143] "Potamogeton richardsonii" "Potamogeton robbinsii"
## [145] "Potamogeton spirillus" "Potamogeton strictifolius"
## [147] "Potamogeton vaseyi" "Potamogeton zosteriformis"
## [149] "Potentilla palustris" "Protectedspecies 1"
## [151] "Protectedspecies 10" "Protectedspecies 11"
## [153] "Protectedspecies 12" "Protectedspecies 2"
## [155] "Protectedspecies 3" "Protectedspecies 4"
## [157] "Protectedspecies 5" "Protectedspecies 6"
## [159] "Protectedspecies 7" "Protectedspecies 8"
## [161] "Protectedspecies 9" "ranunculus"
## [163] "Ranunculus aquatilis" "Ranunculus flabellaris"
## [165] "Ranunculus flammula" "Ranunculus longirostris"
## [167] "riccia" "Riccia fluitans"
## [169] "Ricciocarpos natans" "Rumex orbiculatus"
## [171] "sagittaria" "Sagittaria cristata"
## [173] "Sagittaria cuneata" "Sagittaria graminea"
## [175] "Sagittaria latifolia" "Sagittaria rigida"
## [177] "salix" "schoenoplectus"
## [179] "Schoenoplectus acutus" "Schoenoplectus americanus"
## [181] "Schoenoplectus pungens" "Schoenoplectus subterminalis"
## [183] "Schoenoplectus tabernaemontani" "Schoenoplectus x oblongus"
## [185] "scirpus" "Scirpus atrovirens"
## [187] "Scirpus cyperinus" "Scirpus validus"
## [189] "Scolochloa festucacea" "Scorpidium scorpioides"
## [191] "scutellaria" "Scutellaria lateriflora"
## [193] "Sium suave" "solidago"
## [195] "sparganium" "sparganium (emergent)"
## [197] "sparganium (floating)" "Sparganium americanum"
## [199] "Sparganium angustifolium" "Sparganium emersum"
## [201] "Sparganium eurycarpum" "Sparganium fluctuans"
## [203] "Sparganium natans" "sphagnum"
## [205] "Sphagnum magellanicum" "Spirodela polyrhiza"
## [207] "stuckenia" "Stuckenia filiformis"
## [209] "Stuckenia pectinata" "Tolypella intricata"
## [211] "Triadenum fraseri" "typha"
## [213] "Typha angustifolia" "Typha glauca"
## [215] "Typha latifolia" "utricularia"
## [217] "Utricularia gibba" "Utricularia intermedia"
## [219] "Utricularia macrorhiza" "Utricularia minor"
## [221] "Utricularia vulgaris" "Vallisneria americana"
## [223] "verbena" "Veronica americana"
## [225] "wolffia" "Wolffia borealis"
## [227] "Wolffia columbiana" "Zannichellia palustris"
## [229] "zizania" "Zizania palustris"
## [231] "zosterella"
#only one species of nitellopsis-
plants[TAXON == "nitellopsis", TAXON := "Nitellopsis obtusa" , ]
# Make sure they aren't NA'd taxa that should be marked no-veg-found (are all NA taxa marked for NO_VEG_FOUND?) Yes, looks good
plants[ is.na(TAXON) , .N, NO_VEG_FOUND , ]
## NO_VEG_FOUND N
## <lgcl> <int>
## 1: TRUE 140457
# #now we need to ensure we retain rows where no useable taxa were found
# plants[ NO_VEG_FOUND == TRUE, , ]
#
# #now delete all rows where TAXON == DELETE & NO_VEG_FOUND == F
# # plants[ TAXON == "DELETE" & NO_VEG_FOUND == F, ]
# plants <- plants[ !(TAXON == "DELETE" & NO_VEG_FOUND == F), ]
# samples per taxon
plants[ , .N , TAXON][order(-N), ]
## TAXON N
## <char> <int>
## 1: <NA> 140457
## 2: Ceratophyllum demersum 91658
## 3: chara 51713
## 4: Potamogeton crispus 36032
## 5: Potamogeton zosteriformis 35772
## ---
## 228: Scorpidium scorpioides 1
## 229: Fontinalis sullivantii 1
## 230: acorus 1
## 231: Sphagnum magellanicum 1
## 232: Cicuta maculata 1
#and finally a new unique ID for each observation in the dataset
plants[, OBS_ID := .I]
#update SURVEY_ID
plants[ , length(unique(DATASOURCE)) , .(SURVEY_ID)][V1>2] #not needed unless this call is non-empty
## Empty data.table (0 rows and 2 cols): SURVEY_ID,V1
# plants[ , SURVEY_ID_NEW:= .GRP , .(SURVEY_ID, DATASOURCE) ]
# plant status information ------------------------------------------------
# native diversity must exclude invasives/introduced species, so generate a df that can be use to select cols in these categories
plants[ , sort(unique(TAXON)) , ][!(plants[ , unique(TAXON) , ] %in% c(rte[native_status == "I", mn_dnr_scientific_name],
"Nitellopsis obtusa", "Typha glauca"))]
## [1] "acorus" "Acorus americanus"
## [3] "alisma" "Alisma triviale"
## [5] "alnus" "Andromeda glaucophylla"
## [7] "Andromeda polifolia" "Asclepias incarnata"
## [9] "Asteraceae" "Betula pumila"
## [11] "bidens" "Bidens beckii"
## [13] "Bolboschoenus maritimus" "Boltonia asteroides"
## [15] "Brasenia schreberi" "Butomus umbellatus"
## [17] "Calamagrostis canadensis" "Calla palustris"
## [19] "callitriche" "Caltha palustris"
## [21] "carex" "Carex aquatilis"
## [23] "Carex comosa" "Carex lacustris"
## [25] "Carex pellita" "Carex scoparia"
## [27] "ceratophyllum" "Ceratophyllum demersum"
## [29] "Chamaedaphne calyculata" "chara"
## [31] "Chara canescens" "Chara globularis"
## [33] "characeae" "cicuta"
## [35] "Cicuta maculata" "cyperaceae"
## [37] "drepanocladus" "Drepanocladus aduncus"
## [39] "Dulichium arundinaceum" "elatine"
## [41] "Elatine minima" "eleocharis"
## [43] "Eleocharis acicularis" "Eleocharis erythropoda"
## [45] "Eleocharis palustris" "Eleocharis smallii"
## [47] "elodea" "Elodea canadensis"
## [49] "Elodea nuttallii" "equisetum"
## [51] "Equisetum fluviatile" "eragrostis"
## [53] "Eriocaulon aquaticum" "Eupatorium dubium"
## [55] "Eupatorium maculatum" "Eupatorium perfoliatum"
## [57] "eutrochium" "Fontinalis sullivantii"
## [59] "Glyceria borealis" "Heteranthera dubia"
## [61] "Hippuris vulgaris" "hypericum"
## [63] "Hypericum ellipticum" "impatiens"
## [65] "Impatiens capensis" "iris"
## [67] "Iris versicolor" "Iris virginica"
## [69] "isoetes" "Isoetes echinospora"
## [71] "juncus" "Juncus arcticus"
## [73] "Juncus canadensis" "Juncus effusus"
## [75] "Juncus pelocarpus" "lamiaceae"
## [77] "Ledum groenlandicum" "Leersia oryzoides"
## [79] "lemna" "Lemna minor"
## [81] "Lemna trisulca" "Lemna turionifera"
## [83] "Littorella uniflora" "Lobelia dortmanna"
## [85] "Lychnothamnus barbatus" "lysimachia"
## [87] "Lysimachia terrestris" "Lythrum salicaria"
## [89] "Menyanthes trifoliata" "Myrica gale"
## [91] "myriophyllum" "Myriophyllum exalbescens"
## [93] "Myriophyllum farwellii" "Myriophyllum sibiricum"
## [95] "Myriophyllum spicatum" "Myriophyllum tenellum"
## [97] "najas" "Najas flexilis"
## [99] "Najas guadalupensis" "Najas minor"
## [101] "Nasturtium officinale" "Nelumbo lutea"
## [103] "nitella" "Nitellopsis obtusa"
## [105] "nuphar" "Nuphar advena"
## [107] "Nuphar microphylla" "Nuphar variegata"
## [109] "nymphaea" "Nymphaea odorata"
## [111] "Nymphaea tuberosa" "nymphaeaceae"
## [113] "persicaria" "Persicaria amphibia"
## [115] "Persicaria lapathifolia" "Phalaris arundinacea"
## [117] "Phragmites australis" "poaceae"
## [119] "Polygonum amphibium" "Pontederia cordata"
## [121] "potamogeton" "potamogeton (broad)"
## [123] "potamogeton (narrow)" "Potamogeton alpinus"
## [125] "Potamogeton amplifolius" "Potamogeton crispus"
## [127] "Potamogeton epihydrus" "Potamogeton foliosus"
## [129] "Potamogeton friesii" "Potamogeton gramineus"
## [131] "Potamogeton hillii" "Potamogeton illinoensis"
## [133] "Potamogeton natans" "Potamogeton nodosus"
## [135] "Potamogeton obtusifolius" "Potamogeton praelongus"
## [137] "Potamogeton pusillus" "Potamogeton richardsonii"
## [139] "Potamogeton robbinsii" "Potamogeton spirillus"
## [141] "Potamogeton strictifolius" "Potamogeton vaseyi"
## [143] "Potamogeton zosteriformis" "Protectedspecies 1"
## [145] "Protectedspecies 10" "Protectedspecies 11"
## [147] "Protectedspecies 12" "Protectedspecies 2"
## [149] "Protectedspecies 4" "Protectedspecies 5"
## [151] "Protectedspecies 6" "Protectedspecies 7"
## [153] "Protectedspecies 8" "Protectedspecies 9"
## [155] "ranunculus" "Ranunculus aquatilis"
## [157] "Ranunculus flabellaris" "Ranunculus flammula"
## [159] "Ranunculus longirostris" "riccia"
## [161] "Riccia fluitans" "Ricciocarpos natans"
## [163] "Rumex orbiculatus" "sagittaria"
## [165] "Sagittaria cristata" "Sagittaria cuneata"
## [167] "Sagittaria graminea" "Sagittaria latifolia"
## [169] "Sagittaria rigida" "salix"
## [171] "schoenoplectus" "Schoenoplectus acutus"
## [173] "Schoenoplectus americanus" "Schoenoplectus pungens"
## [175] "Schoenoplectus subterminalis" "Schoenoplectus tabernaemontani"
## [177] "Schoenoplectus x oblongus" "scirpus"
## [179] "Scirpus atrovirens" "Scirpus cyperinus"
## [181] "Scirpus validus" "Scolochloa festucacea"
## [183] "Scorpidium scorpioides" "Scutellaria lateriflora"
## [185] "Sium suave" "solidago"
## [187] "sparganium" "sparganium (emergent)"
## [189] "sparganium (floating)" "Sparganium americanum"
## [191] "Sparganium angustifolium" "Sparganium emersum"
## [193] "Sparganium eurycarpum" "Sparganium fluctuans"
## [195] "Sparganium natans" "sphagnum"
## [197] "Sphagnum magellanicum" "Spirodela polyrhiza"
## [199] "stuckenia" "Stuckenia filiformis"
## [201] "Stuckenia pectinata" "Tolypella intricata"
## [203] "typha" "Typha angustifolia"
## [205] "Typha glauca" "Typha latifolia"
## [207] "utricularia" "Utricularia intermedia"
## [209] "Utricularia macrorhiza" "Utricularia minor"
## [211] "Utricularia vulgaris" "Vallisneria americana"
## [213] "verbena" "Veronica americana"
## [215] "wolffia" "Wolffia borealis"
## [217] "Wolffia columbiana" "Zannichellia palustris"
## [219] "zizania" "Zizania palustris"
## [221] "zosterella" NA
#we'll use this to select columns as we calculate diversty metrics!
natcols <- plants[ !is.na(TAXON), unique(TAXON) , ][!(plants[ !is.na(TAXON), unique(TAXON) , ] %in% c(rte[native_status == "I", mn_dnr_scientific_name],
"Nitellopsis obtusa", "Typha glauca"))]
taxacols <- plants[!is.na(TAXON) , unique(TAXON) , ]
# Prep Data Products ------------------------------------------------------
# **plants_db -------------------------------------------------------------
names(plants)
## [1] "DOW" "Secchi_m" "SECCHI_DATE"
## [4] "SURVEY_ID" "LAKE_NAME" "DATASOURCE"
## [7] "SURVEY_DATE" "STA_NBR_DATASOURCE" "DEPTH_FT"
## [10] "NO_VEG_FOUND" "REL_ABUND" "WHOLE_RAKE_REL_ABUND"
## [13] "SUBSTRATE" "SURVEYOR" "TAXON"
## [16] "SURVEY_ID_DATASOURCE" "SAMPLE_NOTES" "SURFACE_GROWTH"
## [19] "POINT_LVL_SECCHI" "POINT_ID" "OBS_ID"
## [22] "OLD_SURVEY_ID" "DATESURVEYSTART" "COHORT"
## [25] "DATEINFO" "MONTH" "DAY"
## [28] "YEAR" "SUBBASIN" "INVENTORY_STAFF"
## [31] "INVENTORY_STAFFDATE" "USEABLE" "CLEANED"
## [34] "INDATABASE" "INVENTORY_NOTES" "SUBMISSION_STAFF"
## [37] "SUBMISSION_STAFFDATE" "SUBMISSION_NOTES" "MULTIPARTSURVEY"
## [40] "SURVEY_FEEDBACK" "SURVEY_DATASOURCE" "RAKE_SCALE_USED"
## [43] "REL_ABUND_CORRECTED" "order_ID" "Longitude"
## [46] "Latitude" "SECCHI_m_ACCEPTED" "proplight"
plants[ , .N , month(SURVEY_DATE) ]
## month N
## <int> <int>
## 1: 8 224560
## 2: 7 237049
## 3: 6 187167
## 4: 9 27043
## 5: 5 43651
## 6: 10 1641
## 7: 4 10214
## 8: 3 974
plants[ , unique(DATESURVEYSTART), , ][1:100]
## [1] "8/16/2011" "8/10/2011" "7/17/2014" "6/25/2014" "7/15/2009" "7/29/2011"
## [7] "7/21/2011" "6/28/2011" "8/3/2011" "6/29/2010" "7/1/2009" "6/10/2002"
## [13] "7/12/2006" "6/30/2009" "6/18/2014" "7/31/2009" "7/10/2009" "7/11/2012"
## [19] "6/18/2012" "6/8/2011" "6/29/2011" "8/5/2011" "8/20/2012" "8/26/2010"
## [25] "6/16/2011" "8/6/2009" "7/7/2014" "6/16/2005" "6/3/2009" "6/21/2012"
## [31] "6/21/2007" "6/12/2007" "7/9/2010" "7/23/2012" "8/17/2011" "7/3/2014"
## [37] "6/23/2011" "6/15/2007" "7/31/2014" "8/3/2009" "7/2/2010" "6/28/2010"
## [43] "6/17/2013" "7/7/2003" "6/6/2006" "7/19/2012" "8/15/2012" "7/12/2012"
## [49] "8/16/2012" "7/15/2003" "8/6/2013" "6/13/2006" "8/18/2005" "8/18/2014"
## [55] "6/14/2006" "8/22/2002" "7/1/2005" "7/23/2008" "7/22/2011" "7/31/2003"
## [61] "7/22/2003" "6/29/2004" "6/12/2012" "7/24/2012" "7/23/2003" "7/25/2003"
## [67] "9/23/2014" "8/21/2002" "9/22/2014" "7/29/2013" "8/1/2013" "7/25/2013"
## [73] "6/20/2011" "8/11/2011" "7/20/2004" "7/30/2008" "7/21/2010" "8/15/2003"
## [79] "8/12/2003" "9/9/2003" "6/24/2003" "6/19/2013" "9/17/2002" "6/24/2013"
## [85] "7/29/2003" "8/19/2003" "9/18/2002" "6/25/2003" "8/6/2012" "8/14/2003"
## [91] "8/2/2012" "9/19/2003" "8/8/2011" "8/15/2011" "9/24/2014" "8/7/2012"
## [97] "7/9/2013" "6/16/2003" "7/28/2011" "7/28/2010"
plants[ POINT_ID == 151207 , ] #one of these has a substrate, one not
## DOW Secchi_m SECCHI_DATE SURVEY_ID LAKE_NAME DATASOURCE SURVEY_DATE
## <int> <num> <IDat> <int> <char> <char> <IDat>
## 1: 29014600 4.8768 2016-08-22 1440 belle taine source_2 2016-08-22
## 2: 29014600 4.8768 2016-08-22 1440 belle taine source_2 2016-08-22
## STA_NBR_DATASOURCE DEPTH_FT NO_VEG_FOUND REL_ABUND WHOLE_RAKE_REL_ABUND
## <char> <num> <lgcl> <int> <char>
## 1: 27 3.5 TRUE NA
## 2: 27 3.5 TRUE NA
## SUBSTRATE SURVEYOR TAXON SURVEY_ID_DATASOURCE SAMPLE_NOTES
## <char> <char> <char> <char> <char>
## 1: surveyors_6 <NA> 11981608342433000
## 2: sand surveyors_6 <NA> 11981608342433000
## SURFACE_GROWTH POINT_LVL_SECCHI POINT_ID OBS_ID OLD_SURVEY_ID
## <char> <num> <int> <int> <int>
## 1: NA 151207 577513 1440
## 2: NA 151207 577514 1440
## DATESURVEYSTART COHORT DATEINFO MONTH DAY YEAR SUBBASIN INVENTORY_STAFF
## <char> <int> <char> <int> <int> <int> <char> <char>
## 1: 8/22/2016 NA 8 22 2016
## 2: 8/22/2016 NA 8 22 2016
## INVENTORY_STAFFDATE USEABLE CLEANED INDATABASE INVENTORY_NOTES
## <char> <char> <char> <lgcl> <char>
## 1: TRUE
## 2: TRUE
## SUBMISSION_STAFF SUBMISSION_STAFFDATE SUBMISSION_NOTES MULTIPARTSURVEY
## <char> <char> <char> <num>
## 1: staff_1 NA
## 2: staff_1 NA
## SURVEY_FEEDBACK SURVEY_DATASOURCE RAKE_SCALE_USED REL_ABUND_CORRECTED
## <char> <char> <int> <int>
## 1: <NA> NA NA
## 2: <NA> NA NA
## order_ID Longitude Latitude SECCHI_m_ACCEPTED proplight
## <int> <num> <num> <num> <num>
## 1: 18230 -94.91323 46.93566 4.8768 0.6042556
## 2: 18230 -94.91323 46.93566 4.8768 0.6042556
any(duplicated(plants[,.SD, .SDcols = !c("SUBSTRATE","OBS_ID")]))
## [1] TRUE
plants[duplicated(plants[,.SD, .SDcols = !c("SUBSTRATE","OBS_ID")]), , ]
## DOW Secchi_m SECCHI_DATE SURVEY_ID LAKE_NAME DATASOURCE SURVEY_DATE
## <int> <num> <IDat> <int> <char> <char> <IDat>
## 1: 29014600 4.8768 2016-08-22 1440 belle taine source_2 2016-08-22
## STA_NBR_DATASOURCE DEPTH_FT NO_VEG_FOUND REL_ABUND WHOLE_RAKE_REL_ABUND
## <char> <num> <lgcl> <int> <char>
## 1: 27 3.5 TRUE NA
## SUBSTRATE SURVEYOR TAXON SURVEY_ID_DATASOURCE SAMPLE_NOTES
## <char> <char> <char> <char> <char>
## 1: sand surveyors_6 <NA> 11981608342433000
## SURFACE_GROWTH POINT_LVL_SECCHI POINT_ID OBS_ID OLD_SURVEY_ID
## <char> <num> <int> <int> <int>
## 1: NA 151207 577514 1440
## DATESURVEYSTART COHORT DATEINFO MONTH DAY YEAR SUBBASIN INVENTORY_STAFF
## <char> <int> <char> <int> <int> <int> <char> <char>
## 1: 8/22/2016 NA 8 22 2016
## INVENTORY_STAFFDATE USEABLE CLEANED INDATABASE INVENTORY_NOTES
## <char> <char> <char> <lgcl> <char>
## 1: TRUE
## SUBMISSION_STAFF SUBMISSION_STAFFDATE SUBMISSION_NOTES MULTIPARTSURVEY
## <char> <char> <char> <num>
## 1: staff_1 NA
## SURVEY_FEEDBACK SURVEY_DATASOURCE RAKE_SCALE_USED REL_ABUND_CORRECTED
## <char> <char> <int> <int>
## 1: <NA> NA NA
## order_ID Longitude Latitude SECCHI_m_ACCEPTED proplight
## <int> <num> <num> <num> <num>
## 1: 18230 -94.91323 46.93566 4.8768 0.6042556
plants <- plants[!duplicated(plants[,.SD, .SDcols = !c("SUBSTRATE","OBS_ID")]), , ]
#check has orderID field
plants[ , .N , is.na(order_ID) ]
## is.na N
## <lgcl> <int>
## 1: FALSE 728695
## 2: TRUE 3603
plants[is.na(order_ID) , .N , LAKE_NAME ] #I've not been able to resolve the location on these WBs, so we'll leave them un-georeferenced
## LAKE_NAME N
## <char> <int>
## 1: pool 2 97
## 2: reynen pond 46
## 3: unnamed delong wpa 22
## 4: big sob 64
## 5: ivanhoe wma east basin 26
## 6: olson pool 104
## 7: goldmine slough section - vermilion river 215
## 8: vermilion falls section - vermilion river 88
## 9: gull river 335
## 10: mississippi river 1769
## 11: little elk wma 98
## 12: unnamed 20
## 13: sand prairie wma 75
## 14: loerch wma impoundment 31
## 15: daggett brook wma impoundment 107
## 16: sterle pool 175
## 17: trettle pool 323
## 18: dundee marsh 8
names(plants)
## [1] "DOW" "Secchi_m" "SECCHI_DATE"
## [4] "SURVEY_ID" "LAKE_NAME" "DATASOURCE"
## [7] "SURVEY_DATE" "STA_NBR_DATASOURCE" "DEPTH_FT"
## [10] "NO_VEG_FOUND" "REL_ABUND" "WHOLE_RAKE_REL_ABUND"
## [13] "SUBSTRATE" "SURVEYOR" "TAXON"
## [16] "SURVEY_ID_DATASOURCE" "SAMPLE_NOTES" "SURFACE_GROWTH"
## [19] "POINT_LVL_SECCHI" "POINT_ID" "OBS_ID"
## [22] "OLD_SURVEY_ID" "DATESURVEYSTART" "COHORT"
## [25] "DATEINFO" "MONTH" "DAY"
## [28] "YEAR" "SUBBASIN" "INVENTORY_STAFF"
## [31] "INVENTORY_STAFFDATE" "USEABLE" "CLEANED"
## [34] "INDATABASE" "INVENTORY_NOTES" "SUBMISSION_STAFF"
## [37] "SUBMISSION_STAFFDATE" "SUBMISSION_NOTES" "MULTIPARTSURVEY"
## [40] "SURVEY_FEEDBACK" "SURVEY_DATASOURCE" "RAKE_SCALE_USED"
## [43] "REL_ABUND_CORRECTED" "order_ID" "Longitude"
## [46] "Latitude" "SECCHI_m_ACCEPTED" "proplight"
# **point level p/a -------------------------------------------------
plants[ ,.N , REL_ABUND]
## REL_ABUND N
## <int> <int>
## 1: NA 535525
## 2: 1 116421
## 3: 2 39231
## 4: 3 21579
## 5: 4 13274
## 6: 5 6268
plants[ , .N , INDATABASE]
## INDATABASE N
## <lgcl> <int>
## 1: TRUE 732298
# In case of desired fill 1/0 rather than T/F
# plants[ , FILLFIELD := as.numeric(INDATABASE) ,]
# plants[ , .N , FILLFIELD ]
plants_occurrence_wide <- dcast(plants,
SURVEY_ID+
POINT_ID +
NO_VEG_FOUND +
proplight +
DEPTH_FT +
SUBSTRATE +
SURVEYOR ~ TAXON, fun.aggregate = last, value.var = "INDATABASE", fill = FALSE) #Specify a logical var for all included data (INDATABASE) so that this species matrix is all T/F; see previous lines for a 0/1 fill
#diversity metrics (only have richness with p/a, no "evenness", no "diversity"):
# point_natcols <- names(plants_occurrence_wide)[names(plants_occurrence_wide)%in%natcols]
# names(plants_occurrence_wide)
plants_occurrence_wide[ , richness := rowSums(.SD > 0), .SDcols = taxacols ]
plants_occurrence_wide[ , nat_richness := rowSums(.SD > 0), .SDcols = natcols ]
#bring all survey level variables back into the dataset
#check join
# nrow(plants[plants_occurrence_wide, on = .(POINT_ID, SURVEY_ID, NO_VEG_FOUND, proplight,DEPTH_FT,SUBSTRATE,SURVEYOR), mult = "last" , ])
# names(plants[plants_occurrence_wide, on = .(POINT_ID, SURVEY_ID, NO_VEG_FOUND, proplight,DEPTH_FT,SUBSTRATE,SURVEYOR), mult = "last" , ])
#
plants_occurrence_wide <- plants[plants_occurrence_wide, on = .(POINT_ID, SURVEY_ID, NO_VEG_FOUND, proplight, DEPTH_FT, SUBSTRATE, SURVEYOR), mult = "last" , ]
#and drop unneeded cols & those with loss of meaning through munging:
names(plants_occurrence_wide)
## [1] "DOW" "Secchi_m"
## [3] "SECCHI_DATE" "SURVEY_ID"
## [5] "LAKE_NAME" "DATASOURCE"
## [7] "SURVEY_DATE" "STA_NBR_DATASOURCE"
## [9] "DEPTH_FT" "NO_VEG_FOUND"
## [11] "REL_ABUND" "WHOLE_RAKE_REL_ABUND"
## [13] "SUBSTRATE" "SURVEYOR"
## [15] "TAXON" "SURVEY_ID_DATASOURCE"
## [17] "SAMPLE_NOTES" "SURFACE_GROWTH"
## [19] "POINT_LVL_SECCHI" "POINT_ID"
## [21] "OBS_ID" "OLD_SURVEY_ID"
## [23] "DATESURVEYSTART" "COHORT"
## [25] "DATEINFO" "MONTH"
## [27] "DAY" "YEAR"
## [29] "SUBBASIN" "INVENTORY_STAFF"
## [31] "INVENTORY_STAFFDATE" "USEABLE"
## [33] "CLEANED" "INDATABASE"
## [35] "INVENTORY_NOTES" "SUBMISSION_STAFF"
## [37] "SUBMISSION_STAFFDATE" "SUBMISSION_NOTES"
## [39] "MULTIPARTSURVEY" "SURVEY_FEEDBACK"
## [41] "SURVEY_DATASOURCE" "RAKE_SCALE_USED"
## [43] "REL_ABUND_CORRECTED" "order_ID"
## [45] "Longitude" "Latitude"
## [47] "SECCHI_m_ACCEPTED" "proplight"
## [49] "NA" "Acorus americanus"
## [51] "Alisma triviale" "Andromeda glaucophylla"
## [53] "Andromeda polifolia" "Asclepias incarnata"
## [55] "Asteraceae" "Betula pumila"
## [57] "Bidens beckii" "Bolboschoenus fluviatilis"
## [59] "Bolboschoenus maritimus" "Boltonia asteroides"
## [61] "Brasenia schreberi" "Butomus umbellatus"
## [63] "Calamagrostis canadensis" "Calla palustris"
## [65] "Caltha palustris" "Carex aquatilis"
## [67] "Carex comosa" "Carex lacustris"
## [69] "Carex pellita" "Carex scoparia"
## [71] "Ceratophyllum demersum" "Chamaedaphne calyculata"
## [73] "Chara canescens" "Chara globularis"
## [75] "Cicuta maculata" "Drepanocladus aduncus"
## [77] "Dulichium arundinaceum" "Elatine minima"
## [79] "Eleocharis acicularis" "Eleocharis erythropoda"
## [81] "Eleocharis palustris" "Eleocharis smallii"
## [83] "Elodea canadensis" "Elodea nuttallii"
## [85] "Equisetum fluviatile" "Eriocaulon aquaticum"
## [87] "Eupatorium dubium" "Eupatorium maculatum"
## [89] "Eupatorium perfoliatum" "Fontinalis antipyretica"
## [91] "Fontinalis sullivantii" "Glyceria borealis"
## [93] "Heteranthera dubia" "Hippuris vulgaris"
## [95] "Hypericum ellipticum" "Impatiens capensis"
## [97] "Iris versicolor" "Iris virginica"
## [99] "Isoetes echinospora" "Juncus arcticus"
## [101] "Juncus canadensis" "Juncus effusus"
## [103] "Juncus pelocarpus" "Ledum groenlandicum"
## [105] "Leersia oryzoides" "Lemna minor"
## [107] "Lemna trisulca" "Lemna turionifera"
## [109] "Littorella uniflora" "Lobelia dortmanna"
## [111] "Lychnothamnus barbatus" "Lycopus americanus"
## [113] "Lysimachia terrestris" "Lythrum salicaria"
## [115] "Menyanthes trifoliata" "Myrica gale"
## [117] "Myriophyllum alterniflorum" "Myriophyllum exalbescens"
## [119] "Myriophyllum farwellii" "Myriophyllum sibiricum"
## [121] "Myriophyllum spicatum" "Myriophyllum tenellum"
## [123] "Myriophyllum verticillatum" "Najas flexilis"
## [125] "Najas guadalupensis" "Najas minor"
## [127] "Nasturtium officinale" "Nelumbo lutea"
## [129] "Nitellopsis obtusa" "Nuphar advena"
## [131] "Nuphar microphylla" "Nuphar variegata"
## [133] "Nymphaea odorata" "Nymphaea tuberosa"
## [135] "Persicaria amphibia" "Persicaria lapathifolia"
## [137] "Phalaris arundinacea" "Phragmites australis"
## [139] "Polygonum amphibium" "Pontederia cordata"
## [141] "Potamogeton alpinus" "Potamogeton amplifolius"
## [143] "Potamogeton crispus" "Potamogeton epihydrus"
## [145] "Potamogeton foliosus" "Potamogeton friesii"
## [147] "Potamogeton gramineus" "Potamogeton hillii"
## [149] "Potamogeton illinoensis" "Potamogeton natans"
## [151] "Potamogeton nodosus" "Potamogeton obtusifolius"
## [153] "Potamogeton praelongus" "Potamogeton pusillus"
## [155] "Potamogeton richardsonii" "Potamogeton robbinsii"
## [157] "Potamogeton spirillus" "Potamogeton strictifolius"
## [159] "Potamogeton vaseyi" "Potamogeton zosteriformis"
## [161] "Potentilla palustris" "Protectedspecies 1"
## [163] "Protectedspecies 10" "Protectedspecies 11"
## [165] "Protectedspecies 12" "Protectedspecies 2"
## [167] "Protectedspecies 3" "Protectedspecies 4"
## [169] "Protectedspecies 5" "Protectedspecies 6"
## [171] "Protectedspecies 7" "Protectedspecies 8"
## [173] "Protectedspecies 9" "Ranunculus aquatilis"
## [175] "Ranunculus flabellaris" "Ranunculus flammula"
## [177] "Ranunculus longirostris" "Riccia fluitans"
## [179] "Ricciocarpos natans" "Rumex orbiculatus"
## [181] "Sagittaria cristata" "Sagittaria cuneata"
## [183] "Sagittaria graminea" "Sagittaria latifolia"
## [185] "Sagittaria rigida" "Schoenoplectus acutus"
## [187] "Schoenoplectus americanus" "Schoenoplectus pungens"
## [189] "Schoenoplectus subterminalis" "Schoenoplectus tabernaemontani"
## [191] "Schoenoplectus x oblongus" "Scirpus atrovirens"
## [193] "Scirpus cyperinus" "Scirpus validus"
## [195] "Scolochloa festucacea" "Scorpidium scorpioides"
## [197] "Scutellaria lateriflora" "Sium suave"
## [199] "Sparganium americanum" "Sparganium angustifolium"
## [201] "Sparganium emersum" "Sparganium eurycarpum"
## [203] "Sparganium fluctuans" "Sparganium natans"
## [205] "Sphagnum magellanicum" "Spirodela polyrhiza"
## [207] "Stuckenia filiformis" "Stuckenia pectinata"
## [209] "Tolypella intricata" "Triadenum fraseri"
## [211] "Typha angustifolia" "Typha glauca"
## [213] "Typha latifolia" "Utricularia gibba"
## [215] "Utricularia intermedia" "Utricularia macrorhiza"
## [217] "Utricularia minor" "Utricularia vulgaris"
## [219] "Vallisneria americana" "Veronica americana"
## [221] "Wolffia borealis" "Wolffia columbiana"
## [223] "Zannichellia palustris" "Zizania palustris"
## [225] "acorus" "alisma"
## [227] "alnus" "bidens"
## [229] "callitriche" "carex"
## [231] "ceratophyllum" "chara"
## [233] "characeae" "cicuta"
## [235] "cyperaceae" "drepanocladus"
## [237] "elatine" "eleocharis"
## [239] "elodea" "equisetum"
## [241] "eragrostis" "eutrochium"
## [243] "hypericum" "impatiens"
## [245] "iris" "isoetes"
## [247] "juncus" "lamiaceae"
## [249] "lemna" "lysimachia"
## [251] "myriophyllum" "najas"
## [253] "nitella" "nuphar"
## [255] "nymphaea" "nymphaeaceae"
## [257] "persicaria" "poaceae"
## [259] "potamogeton" "potamogeton (broad)"
## [261] "potamogeton (narrow)" "ranunculus"
## [263] "riccia" "sagittaria"
## [265] "salix" "schoenoplectus"
## [267] "scirpus" "scutellaria"
## [269] "solidago" "sparganium"
## [271] "sparganium (emergent)" "sparganium (floating)"
## [273] "sphagnum" "stuckenia"
## [275] "typha" "utricularia"
## [277] "verbena" "wolffia"
## [279] "zizania" "zosterella"
## [281] "richness" "nat_richness"
plants_occurrence_wide[ , c("STA_NBR_DATASOURCE","SURVEY_ID_DATASOURCE",
"REL_ABUND", "REL_ABUND_CORRECTED", "WHOLE_RAKE_REL_ABUND",
"SAMPLE_NOTES",
"SURFACE_GROWTH",
"POINT_LVL_SECCHI",
"OLD_SURVEY_ID",
"COHORT",
"DATEINFO", "MONTH", "DAY", "YEAR", "DATESURVEYSTART",
"INVENTORY_STAFF", "INVENTORY_STAFFDATE", "INVENTORY_NOTES",
"USEABLE", "CLEANED", "INDATABASE",
"SUBMISSION_STAFF", "SUBMISSION_STAFFDATE", "SUBMISSION_NOTES",
"SURVEY_FEEDBACK", "DATASOURCE", "RAKE_SCALE_USED",
"NA", "TAXON", "SUBSTRATE", "OBS_ID", "NO_VEG_FOUND") := NULL , ]
setcolorder(plants_occurrence_wide, c("DOW", "LAKE_NAME", "order_ID", "SUBBASIN",
"SURVEY_ID", "SURVEY_DATASOURCE", "SURVEY_DATE", "MULTIPARTSURVEY", "SURVEYOR",
"Secchi_m", "SECCHI_DATE", "SECCHI_m_ACCEPTED",
"POINT_ID" ,"DEPTH_FT", "proplight", "Longitude", "Latitude"))
#check to make sure I didn't dump something critical for an ident:
plants_occurrence_wide[duplicated(plants_occurrence_wide)]
## Empty data.table (0 rows and 250 cols): DOW,LAKE_NAME,order_ID,SUBBASIN,SURVEY_ID,SURVEY_DATASOURCE...
# **point level rake abund --------------------------------------------------
plants_rakeabund_wide <- dcast(plants[!is.na(RAKE_SCALE_USED)], SURVEY_ID+
POINT_ID +
NO_VEG_FOUND +
proplight +
DEPTH_FT +
SUBSTRATE +
SURVEYOR ~ TAXON, value.var = "REL_ABUND_CORRECTED", fun.aggregate = last, fill = 0)
plants_rakeabund_wide[ ,.N , NO_VEG_FOUND]
## NO_VEG_FOUND N
## <lgcl> <int>
## 1: FALSE 79660
## 2: TRUE 39208
#calculate diversity metrics for each rake throw
rake_taxacols <- names(plants_rakeabund_wide)[names(plants_rakeabund_wide)%in%taxacols]
rake_natcols <- names(plants_rakeabund_wide)[names(plants_rakeabund_wide)%in%natcols]
# names(plants_rakeabund_wide)
# diversity metrics
plants_rakeabund_wide[ , shannon_div := diversity(plants_rakeabund_wide[,.SD, .SDcols = rake_taxacols],index = "shannon") ]
plants_rakeabund_wide[ , simpsons_div := diversity(plants_rakeabund_wide[,.SD, .SDcols = rake_taxacols],index = "invsimpson") ]
plants_rakeabund_wide[ , shannon_div_nat := diversity(plants_rakeabund_wide[,.SD, .SDcols = rake_natcols],index = "shannon") ]
plants_rakeabund_wide[ , simpsons_div_nat := diversity(plants_rakeabund_wide[,.SD, .SDcols = rake_natcols],index = "invsimpson") ]
# richness
plants_rakeabund_wide[ , richness := rowSums(.SD > 0), .SDcols = rake_taxacols ]
plants_rakeabund_wide[ , nat_richness := rowSums(.SD > 0), .SDcols = rake_natcols ]
summary(plants_rakeabund_wide$`Potamogeton crispus`)#check that max rakeabund is 3
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2689 0.0000 3.0000
#bring all survey level variables back into the dataset
#check join
# nrow(plants[plants_rakeabund_wide, on = .(POINT_ID, SURVEY_ID, NO_VEG_FOUND, proplight,DEPTH_FT,SUBSTRATE,SURVEYOR), mult = "last" , ])
# names(plants[plants_rakeabund_wide, on = .(POINT_ID, SURVEY_ID, NO_VEG_FOUND, proplight,DEPTH_FT,SUBSTRATE,SURVEYOR), mult = "last" , ])
#
plants_rakeabund_wide <- plants[plants_rakeabund_wide, on = .(POINT_ID, SURVEY_ID, NO_VEG_FOUND, proplight,DEPTH_FT,SURVEYOR, SUBSTRATE), mult = "last" , ]
#and drop unneeded cols & those with loss of meaning through munging:
plants_rakeabund_wide[ , c("STA_NBR_DATASOURCE", "REL_ABUND", "REL_ABUND_CORRECTED",
"WHOLE_RAKE_REL_ABUND", "SURVEY_ID_DATASOURCE", "SAMPLE_NOTES",
"SURFACE_GROWTH", "POINT_LVL_SECCHI", "OLD_SURVEY_ID", "DATESURVEYSTART",
"COHORT", "DATEINFO", "MONTH", "DAY", "YEAR", "INVENTORY_STAFF", "INVENTORY_STAFFDATE",
"INVENTORY_NOTES", "USEABLE", "CLEANED", "INDATABASE", "SUBMISSION_STAFF",
"SUBMISSION_STAFFDATE", "SUBMISSION_NOTES", "SURVEY_FEEDBACK", "DATASOURCE",
"RAKE_SCALE_USED") := NULL , ]
names(plants_rakeabund_wide)
## [1] "DOW" "Secchi_m"
## [3] "SECCHI_DATE" "SURVEY_ID"
## [5] "LAKE_NAME" "SURVEY_DATE"
## [7] "DEPTH_FT" "NO_VEG_FOUND"
## [9] "SUBSTRATE" "SURVEYOR"
## [11] "TAXON" "POINT_ID"
## [13] "OBS_ID" "SUBBASIN"
## [15] "MULTIPARTSURVEY" "SURVEY_DATASOURCE"
## [17] "order_ID" "Longitude"
## [19] "Latitude" "SECCHI_m_ACCEPTED"
## [21] "proplight" "NA"
## [23] "Bidens beckii" "Bolboschoenus fluviatilis"
## [25] "Brasenia schreberi" "Caltha palustris"
## [27] "Carex comosa" "Carex pellita"
## [29] "Carex scoparia" "Ceratophyllum demersum"
## [31] "Chara globularis" "Eleocharis acicularis"
## [33] "Eleocharis erythropoda" "Eleocharis palustris"
## [35] "Elodea canadensis" "Elodea nuttallii"
## [37] "Equisetum fluviatile" "Fontinalis antipyretica"
## [39] "Glyceria borealis" "Heteranthera dubia"
## [41] "Hippuris vulgaris" "Iris virginica"
## [43] "Isoetes echinospora" "Juncus arcticus"
## [45] "Juncus canadensis" "Juncus effusus"
## [47] "Juncus pelocarpus" "Lemna minor"
## [49] "Lemna trisulca" "Lychnothamnus barbatus"
## [51] "Lythrum salicaria" "Myriophyllum exalbescens"
## [53] "Myriophyllum farwellii" "Myriophyllum sibiricum"
## [55] "Myriophyllum spicatum" "Myriophyllum verticillatum"
## [57] "Najas flexilis" "Najas guadalupensis"
## [59] "Najas minor" "Nelumbo lutea"
## [61] "Nitellopsis obtusa" "Nuphar advena"
## [63] "Nuphar variegata" "Nymphaea odorata"
## [65] "Nymphaea tuberosa" "Persicaria amphibia"
## [67] "Phalaris arundinacea" "Phragmites australis"
## [69] "Polygonum amphibium" "Pontederia cordata"
## [71] "Potamogeton amplifolius" "Potamogeton crispus"
## [73] "Potamogeton epihydrus" "Potamogeton foliosus"
## [75] "Potamogeton friesii" "Potamogeton gramineus"
## [77] "Potamogeton illinoensis" "Potamogeton natans"
## [79] "Potamogeton nodosus" "Potamogeton obtusifolius"
## [81] "Potamogeton praelongus" "Potamogeton pusillus"
## [83] "Potamogeton richardsonii" "Potamogeton robbinsii"
## [85] "Potamogeton spirillus" "Potamogeton strictifolius"
## [87] "Potamogeton zosteriformis" "Protectedspecies 1"
## [89] "Protectedspecies 10" "Protectedspecies 12"
## [91] "Protectedspecies 2" "Protectedspecies 3"
## [93] "Protectedspecies 7" "Protectedspecies 8"
## [95] "Ranunculus aquatilis" "Ranunculus flabellaris"
## [97] "Ranunculus longirostris" "Riccia fluitans"
## [99] "Ricciocarpos natans" "Sagittaria cristata"
## [101] "Sagittaria graminea" "Sagittaria latifolia"
## [103] "Sagittaria rigida" "Schoenoplectus acutus"
## [105] "Schoenoplectus americanus" "Schoenoplectus pungens"
## [107] "Schoenoplectus subterminalis" "Schoenoplectus tabernaemontani"
## [109] "Scirpus cyperinus" "Scirpus validus"
## [111] "Sium suave" "Sparganium eurycarpum"
## [113] "Spirodela polyrhiza" "Stuckenia filiformis"
## [115] "Stuckenia pectinata" "Tolypella intricata"
## [117] "Typha angustifolia" "Typha glauca"
## [119] "Typha latifolia" "Utricularia gibba"
## [121] "Utricularia macrorhiza" "Utricularia minor"
## [123] "Utricularia vulgaris" "Vallisneria americana"
## [125] "Wolffia borealis" "Wolffia columbiana"
## [127] "Zannichellia palustris" "carex"
## [129] "ceratophyllum" "chara"
## [131] "characeae" "cyperaceae"
## [133] "drepanocladus" "eleocharis"
## [135] "elodea" "juncus"
## [137] "lemna" "myriophyllum"
## [139] "najas" "nitella"
## [141] "nuphar" "nymphaea"
## [143] "poaceae" "potamogeton"
## [145] "potamogeton (broad)" "potamogeton (narrow)"
## [147] "ranunculus" "riccia"
## [149] "sagittaria" "salix"
## [151] "schoenoplectus" "scirpus"
## [153] "sparganium" "typha"
## [155] "utricularia" "wolffia"
## [157] "zizania" "shannon_div"
## [159] "simpsons_div" "shannon_div_nat"
## [161] "simpsons_div_nat" "richness"
## [163] "nat_richness"
plants_rakeabund_wide[ , c("STA_NBR_DATASOURCE","SURVEY_ID_DATASOURCE",
"REL_ABUND", "REL_ABUND_CORRECTED", "WHOLE_RAKE_REL_ABUND",
"SAMPLE_NOTES",
"SURFACE_GROWTH",
"POINT_LVL_SECCHI",
"OLD_SURVEY_ID",
"COHORT",
"DATEINFO", "MONTH", "DAY", "YEAR", "DATESURVEYSTART",
"INVENTORY_STAFF", "INVENTORY_STAFFDATE", "INVENTORY_NOTES",
"USEABLE", "CLEANED", "INDATABASE",
"SUBMISSION_STAFF", "SUBMISSION_STAFFDATE", "SUBMISSION_NOTES",
"SURVEY_FEEDBACK", "DATASOURCE", "RAKE_SCALE_USED",
"NA","TAXON", "SUBSTRATE", "OBS_ID", "NO_VEG_FOUND") := NULL , ]
setcolorder(plants_rakeabund_wide, c("DOW", "LAKE_NAME", "order_ID", "SUBBASIN",
"SURVEY_ID", "SURVEY_DATASOURCE", "SURVEY_DATE", "MULTIPARTSURVEY", "SURVEYOR",
"Secchi_m", "SECCHI_DATE", "SECCHI_m_ACCEPTED",
"POINT_ID" ,"DEPTH_FT", "proplight", "Longitude", "Latitude"))
#cleanup:
rm(rake_taxacols,rake_natcols)
# **survey level stats -------------------------------------------
surveys <- plants[ , .(tot_n_samp = length(unique(POINT_ID))) , SURVEY_ID ]
#add richness to the surveys dataset
surveys[ , taxa_richness := #take the "taxon count" and subtract one if the survey includes NAs (see next two lines)
plants[ , length(unique(TAXON)) , SURVEY_ID ][ , V1]-# ("total richness", but counts NAs as a taxon) minus
plants[ , ifelse(sum(is.na(TAXON))== 0, 0, 1), SURVEY_ID][,V1],]# (each survey get a 0 if no NAs or a 1 if contains NA's)
# extent of vegetation in survey (proportion vegetated)
surveys <- merge(surveys,plants[!is.na(TAXON), .(n_points_vegetated=length(unique(POINT_ID))) , SURVEY_ID ],
by = "SURVEY_ID", all.x = TRUE)[is.na(n_points_vegetated), n_points_vegetated := 0 ]
surveys[ , prop_veg := n_points_vegetated/tot_n_samp ,]
#create a plant observation matrix (species abund by survey)
survey_species_matrix <- dcast(plants[!is.na(TAXON) , .("count" = length(unique(POINT_ID))) , .(SURVEY_ID,TAXON)], SURVEY_ID ~ TAXON, value.var = "count", fill = 0) #note that this line creates the matrix ONLY for surveys that had species observations (~70 surveys had no species observed)
#diversity indicies:
# species names:
natcols <- names(survey_species_matrix)[names(survey_species_matrix) %in% natcols]
# total diversity
survey_species_matrix[ , shannon_div := diversity(.SD,index = "shannon"), .SDcols = taxacols ]
survey_species_matrix[ , simpsons_div := diversity(.SD,index = "invsimpson"), .SDcols = taxacols ]
# native diversity
survey_species_matrix[ , shannon_div_nat := diversity(.SD,index = "shannon"), .SDcols = natcols ]
survey_species_matrix[ , simpsons_div_nat := diversity(.SD,index = "invsimpson"), .SDcols = natcols ]
survey_species_matrix[simpsons_div_nat == Inf, simpsons_div_nat := 0]
# native richness
survey_species_matrix[ , nat_richness := rowSums(survey_species_matrix[ , .SD, .SDcols = natcols] > 0), ]
# depth stats
# depth surveyed stats:
surveys <- surveys[plants[ !is.na(DEPTH_FT), .("max_depth_surveyed" = max(DEPTH_FT)) , SURVEY_ID], on = "SURVEY_ID" , ]
surveys <- surveys[plants[ !is.na(DEPTH_FT), .("min_depth_surveyed" = min(DEPTH_FT)) , SURVEY_ID], on = "SURVEY_ID" , ]
surveys <- surveys[plants[ !is.na(DEPTH_FT), .("mean_depth_surveyed" = mean(DEPTH_FT)) , SURVEY_ID], on = "SURVEY_ID" , ]
surveys <- surveys[plants[ !is.na(DEPTH_FT), .("median_depth_surveyed" = median(DEPTH_FT)) , SURVEY_ID], on = "SURVEY_ID" , ]
surveys <- surveys[plants[ !is.na(DEPTH_FT), .("IQR_depth_surveyed" = IQR(DEPTH_FT)) , SURVEY_ID], on = "SURVEY_ID" , ]
#vegetated depths data
#max depth vegetated within survey:
#some of these might warrant removal, depending on whats being done with the data
plants[NO_VEG_FOUND == F & DEPTH_FT>50, length(POINT_ID) , .(SURVEY_DATASOURCE, DOW, SUBBASIN, SURVEY_DATE, LAKE_NAME)]
## SURVEY_DATASOURCE DOW SUBBASIN SURVEY_DATE
## <char> <int> <char> <IDat>
## 1: Newman Lab University of Minnesota 10000200 2011-06-29
## 2: DNR Invasive Species Program 40005600 2012-06-15
## 3: DNR Invasive Species Program 82010600 2016-04-30
## 4: Three Rivers Park District 27019101 west basin 2014-06-20
## 5: Freshwater Scientific Services 27013300 Grays Bay 2017-08-28
## 6: 21005700 2009-08-10
## 7: 21005700 2011-08-17
## 8: DNR Fisheries 21005700 2008-06-02
## 9: 34003200 2011-06-23
## LAKE_NAME V1
## <char> <int>
## 1: riley 1
## 2: rays 2
## 3: elmo 6
## 4: sarah 2
## 5: minnetonka 4
## 6: carlos 2
## 7: carlos 1
## 8: carlos 1
## 9: carrie 2
plants[ NO_VEG_FOUND == FALSE & DEPTH_FT<50 , .("max_depth_vegetated" = max(DEPTH_FT)) , SURVEY_ID]
## SURVEY_ID max_depth_vegetated
## <int> <num>
## 1: 1 7.0
## 2: 2 11.0
## 3: 3 6.0
## 4: 4 4.0
## 5: 5 6.2
## ---
## 3122: 2041 10.0
## 3123: 2042 13.0
## 3124: 2043 8.0
## 3125: 2822 19.0
## 3126: 4336 9.0
surveys <- merge( surveys , plants[ NO_VEG_FOUND == FALSE& DEPTH_FT<50 , .("max_depth_vegetated" = max(DEPTH_FT, na.rm = T)) , SURVEY_ID] , by = "SURVEY_ID" , all.x =TRUE )
#other depth vegetated stats:
surveys <- merge( surveys , plants[ NO_VEG_FOUND == FALSE& DEPTH_FT<50 , .("min_depth_vegetated" = min(DEPTH_FT, na.rm = T)) , SURVEY_ID], by = "SURVEY_ID" , all.x =TRUE )
surveys <- merge( surveys , plants[ NO_VEG_FOUND == FALSE& DEPTH_FT<50 , .("mean_depth_vegetated" = mean(DEPTH_FT, na.rm = T)) , SURVEY_ID], by = "SURVEY_ID" , all.x =TRUE )
surveys <- merge( surveys , plants[ NO_VEG_FOUND == FALSE& DEPTH_FT<50 , .("median_depth_vegetated" = median(DEPTH_FT, na.rm = T)) , SURVEY_ID], by = "SURVEY_ID" , all.x =TRUE )
surveys <- merge( surveys , plants[ NO_VEG_FOUND == FALSE& DEPTH_FT>50 , .("IQR_depth_vegetated" = IQR(DEPTH_FT, na.rm = T)) , SURVEY_ID], by = "SURVEY_ID" , all.x =TRUE )
# species matrix into survey data
#species matrix for surveys
surveys <- merge(surveys, survey_species_matrix, by = "SURVEY_ID", all.x = T)
f_dowle3natozeros(surveys, names(survey_species_matrix)) #the merge incorrectly assigns NAs for non obs... here we replace those with 0s
# check work:
# summary(surveys[,1:17])
#append survey data (basic data from plants db) to these
# names(plants)
surveys <- merge(plants[order(SURVEY_DATE) , .("nobs" = .N, "SURVEY_DATE" = first(SURVEY_DATE)) , .(SURVEY_ID, SURVEY_DATASOURCE,
LAKE_NAME, DOW, SUBBASIN,
MULTIPARTSURVEY, order_ID) ],surveys, by = "SURVEY_ID")
# summary(surveys)
# names(surveys) <- gsub(" ", "_", gsub( "\\(", "_", gsub( "\\)", "_", names(surveys))))
# secchi data metrics
# rescue the secchi data from the plants db for these surveys
surveys[plants, Secchi_m := Secchi_m, on = "SURVEY_ID"]
surveys[plants, Secchi_m_date := SECCHI_DATE, on = "SURVEY_ID"]
#OPTIONAL: merge in the geodata + lake data to the survey work
# surveys <- pwi_l[surveys, on = .(order_ID), mult = "first" ]
#have to strip off the geometry to prevent failure in write to csv
# surveys[ ,geometry := NULL ,]
# n samples within historical max depth
# surveys <- merge(surveys,plants[!is.na(TAXON), .(alltime_maxvegdep = max(DEPTH_FT)) , .(DOW, SUBBASIN) ], by = c("DOW", "SUBBASIN"), all.x = TRUE) [is.na(alltime_maxvegdep), alltime_maxvegdep := 0 ]
# summary(surveys$alltime_maxvegdep)
# surveys[ , hist(alltime_maxvegdep) , ]
#for plants records with plants, whats the max depth by lake?
#remove these cols if they exist (added this in troubleshooting, should kick warning, but have no effect on product)
plants[ ,alltime_maxvegdep := NULL]
plants[ ,survey_maxvegdep := NULL]
# Calculate max depth for non-NA TAXON records
max_depth <- plants[!is.na(TAXON) & DEPTH_FT < 50, .(alltime_maxvegdep = max(DEPTH_FT, na.rm = T)), by = .(DOW, SUBBASIN)]
# Merge the result back into the original data
plants <- merge(plants, max_depth, by = c("DOW", "SUBBASIN"), all.x = TRUE)
# Calculate max depth for non-NA TAXON records
max_depth <- plants[!is.na(TAXON) & DEPTH_FT < 50, .(survey_maxvegdep = max(DEPTH_FT, na.rm = T)), by = .(SURVEY_ID)]
# Merge the result back into the original data
plants <- merge(plants, max_depth, by = "SURVEY_ID", all.x = TRUE)
plants[ , .N , .(alltime_maxvegdep, survey_maxvegdep, SURVEY_ID) , ]
## alltime_maxvegdep survey_maxvegdep SURVEY_ID N
## <num> <num> <int> <int>
## 1: 7.0 7.0 1 63
## 2: 11.0 11.0 2 126
## 3: 6.0 6.0 3 160
## 4: 4.0 4.0 4 147
## 5: 6.2 6.2 5 118
## ---
## 3190: 20.6 14.5 4337 270
## 3191: 7.0 5.2 4338 57
## 3192: 7.0 7.0 4339 68
## 3193: 12.0 12.0 4340 114
## 3194: 26.9 23.0 4341 744
summary(plants[ , .N , .(alltime_maxvegdep, survey_maxvegdep, SURVEY_ID) , ])
## alltime_maxvegdep survey_maxvegdep SURVEY_ID N
## Min. : 0.00 Min. : 0.000 Min. : 1.0 Min. : 3.0
## 1st Qu.: 5.20 1st Qu.: 4.800 1st Qu.: 823.2 1st Qu.: 66.0
## Median :10.00 Median : 8.000 Median :1649.5 Median : 131.5
## Mean :11.84 Mean : 9.597 Mean :1655.2 Mean : 229.3
## 3rd Qu.:16.40 3rd Qu.:13.100 3rd Qu.:2483.8 3rd Qu.: 270.0
## Max. :45.60 Max. :45.600 Max. :4341.0 Max. :3836.0
## NA's :39 NA's :68
plants[is.na(survey_maxvegdep), .N , NO_VEG_FOUND ]
## NO_VEG_FOUND N
## <lgcl> <int>
## 1: TRUE 3741
plot(data = plants[, .N , .(alltime_maxvegdep, survey_maxvegdep, SURVEY_ID) , ],alltime_maxvegdep~survey_maxvegdep )
# n_points within all time max vegetated depth
plants[,
.(alltime_maxvegdep_n_samp = fifelse(is.na(first(alltime_maxvegdep)) , NA ,
length(POINT_ID))
) , SURVEY_ID ]
## Key: <SURVEY_ID>
## SURVEY_ID alltime_maxvegdep_n_samp
## <int> <int>
## 1: 1 63
## 2: 2 126
## 3: 3 160
## 4: 4 147
## 5: 5 118
## ---
## 3190: 4337 270
## 3191: 4338 57
## 3192: 4339 68
## 3193: 4340 114
## 3194: 4341 744
surveys <- merge(surveys,
plants[,
.(alltime_maxvegdep_n_samp = fifelse(is.na(first(alltime_maxvegdep)) , NA ,
length(
unique(ifelse(DEPTH_FT <= alltime_maxvegdep,POINT_ID,
NA),
na.rm =T)
)
),
alltime_maxvegdep = first(alltime_maxvegdep)) ,
SURVEY_ID ],
by = "SURVEY_ID", all.x = TRUE
)
# n_points within survey specific max vegetated depth
surveys <- merge(surveys,
plants[,
.(survey_maxvegdep_n_samp =
fifelse(
is.na(
first(survey_maxvegdep)) , NA ,
length(
unique(
ifelse(DEPTH_FT <= survey_maxvegdep,
POINT_ID,NA
),
na.rm =T
)
)
), survey_maxvegdep = first(survey_maxvegdep
)
)
, SURVEY_ID ],
by = "SURVEY_ID", all.x = TRUE
)
plot( data = surveys, alltime_maxvegdep_n_samp ~ n_points_vegetated, xlab = "n points vegetated in this survey", ylab = "n points within all time max vegetated depth")
plot( data = surveys[ , .("propsamples_in_histmaxveg_depth" = (alltime_maxvegdep_n_samp/tot_n_samp),
"prop_samples_vegetated" = (n_points_vegetated/tot_n_samp)
) , ], propsamples_in_histmaxveg_depth ~ prop_samples_vegetated)
plot(data = surveys, alltime_maxvegdep_n_samp ~ survey_maxvegdep_n_samp, xlab = "n points within survey specific vegetated depth", ylab = "n points within all time max vegetated depth")
plot(data = surveys, alltime_maxvegdep ~ survey_maxvegdep)
summary(surveys[ , .(alltime_maxvegdep, survey_maxvegdep, max_depth_vegetated, min_depth_vegetated)])
## alltime_maxvegdep survey_maxvegdep max_depth_vegetated min_depth_vegetated
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. :0.000
## 1st Qu.: 5.20 1st Qu.: 4.800 1st Qu.: 4.800 1st Qu.:1.000
## Median :10.00 Median : 8.000 Median : 8.000 Median :1.300
## Mean :11.84 Mean : 9.597 Mean : 9.597 Mean :1.586
## 3rd Qu.:16.40 3rd Qu.:13.100 3rd Qu.:13.100 3rd Qu.:2.000
## Max. :45.60 Max. :45.600 Max. :45.600 Max. :9.800
## NA's :39 NA's :68 NA's :68 NA's :68
surveys[taxa_richness == 0, .N , alltime_maxvegdep]
## alltime_maxvegdep N
## <num> <int>
## 1: 2.5 3
## 2: NA 39
## 3: 12.0 1
## 4: 10.6 1
## 5: 1.0 2
## 6: 4.5 1
## 7: 8.8 3
## 8: 11.2 4
## 9: 5.5 2
## 10: 5.0 1
## 11: 9.8 3
## 12: 2.0 1
## 13: 8.5 1
## 14: 10.0 1
## 15: 2.8 1
## 16: 11.0 1
## 17: 4.8 1
## 18: 6.5 2
# **species pools & watershed metrics ----------------------------------------------------
We have super awesome species pool data because we’ve got species abunds across multiple scales: From the smallest (point– plants_rakeabund_wide or plants_occurrence_wide) scale we have a species abundance matrix that can be treated as a product of the species pool above it (whole survey/lake), which we also have an abundance matrix for! We can also move up to the landscape scale, building species abundance matricies by aggregating these next-lower-scale data.
For example, we can do as described above (compressing matricies to richness for viz) and aggregate to the HUC-8 watershed level.
#first add the lake level richness from each survey to the point rake abund data
plants_rakeabund_wide[ , surveyrichness := surveys[match(plants_rakeabund_wide[ ,SURVEY_ID ,], surveys[, SURVEY_ID, ]), taxa_richness] ,]
plants_occurrence_wide[ , surveyrichness := surveys[match(plants_occurrence_wide[ ,SURVEY_ID ,], surveys[, SURVEY_ID, ]), taxa_richness] ,]
ggplot( data = plants_rakeabund_wide,
aes(jitter(surveyrichness), jitter(nat_richness)))+
geom_point(alpha = .05)+
ylab("point level native richness")+
xlab("suvey level native richness")
# now we need to create a watershed level species matrix:
#check keys
# pwi_l$order_ID
# plants$order_ID
plants[ , watershed := pwi_l[match(plants[ , order_ID ,],pwi_l[ , order_ID ,]), major , ],]
# plants[ , length(unique(POINT_ID)) , watershed]
watersheds <- plants[ , .("n_points" = length(unique(POINT_ID))) , watershed]
watersheds <- merge(watersheds,
plants[!is.na(TAXON) , .("n_species" = length(unique(TAXON))) , watershed],
by = "watershed",
all.x = T)
watershed_occurrence_wide <- dcast(plants, watershed ~ TAXON, value.var = "INDATABASE", fun.aggregate = sum, fill = 0)
watershed_occurrence_wide <- merge(watersheds,
watershed_occurrence_wide,
by = "watershed",
all.x = T)
plants_rakeabund_wide[ , watershed := plants[match(plants_rakeabund_wide[ ,SURVEY_ID ,], plants[, SURVEY_ID, ]), watershed] ,]
plants_rakeabund_wide[ , watershedrichness := watershed_occurrence_wide[match(plants_rakeabund_wide[ ,watershed ,], watershed_occurrence_wide[, watershed, ]), n_species] ,]
# add to occurrence wide set:
plants_occurrence_wide[ , watershed := plants[match(plants_occurrence_wide[ ,SURVEY_ID ,], plants[, SURVEY_ID, ]), watershed] ,]
plants_occurrence_wide[ , watershedrichness := watershed_occurrence_wide[match(plants_occurrence_wide[ ,watershed ,], watershed_occurrence_wide[, watershed, ]), n_species] ,]
# watershed richness as the predictor of point scale richness:
ggplot( data = plants_rakeabund_wide,
aes(jitter(watershedrichness), jitter(nat_richness)))+
geom_point()+
ylab("point level native richness")+
xlab("watershed level native richness")
surveys[ , watershed := plants[match(surveys[ , SURVEY_ID ,], plants[, SURVEY_ID, ]), watershed] ,]
surveys[ , watershedrichness := watershed_occurrence_wide[match(surveys[ ,watershed ,], watershed_occurrence_wide[, watershed, ]), n_species] ,]
surveys[is.na(watershedrichness), watershedrichness := 0]
ggplot( data = surveys,
aes(watershedrichness, nat_richness))+
geom_point()+
geom_smooth(method = "loess")+
ylab("Survey Richness")+
xlab("HUC-8 Watershed Richness")+
theme_bw()
# Get watershed Diversity
names(watershed_occurrence_wide) %in% natcols
## [1] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE FALSE TRUE FALSE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [97] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
watershed_occurrence_wide[ , simpson_div_nat := diversity(.SD,index = "invsimpson" ) , .SDcols = c(names(watershed_occurrence_wide) %in% natcols)]
hist(watershed_occurrence_wide$simpson_div_nat)
plants_rakeabund_wide[ , watershedsimpson_nat := watershed_occurrence_wide[match(plants_rakeabund_wide[ ,watershed ,], watershed_occurrence_wide[, watershed, ]), simpson_div_nat] ,]
plants_occurrence_wide[ , watershedsimpson_nat := watershed_occurrence_wide[match(plants_occurrence_wide[ ,watershed ,], watershed_occurrence_wide[, watershed, ]), simpson_div_nat] ,]
plants_rakeabund_wide[ , surveysimpson_nat := surveys[match(plants_rakeabund_wide[ ,SURVEY_ID ,], surveys[, SURVEY_ID, ]), simpsons_div_nat] ,]
plants_occurrence_wide[ , surveysimpson_nat := surveys[match(plants_occurrence_wide[ ,SURVEY_ID ,], surveys[, SURVEY_ID, ]), simpsons_div_nat] ,]
surveys[ , watershedsimpson_nat := watershed_occurrence_wide[match(surveys[ ,watershed ,], watershed_occurrence_wide[, watershed, ]), simpson_div_nat] ,]
#redo species pool plots:
point_pools <- ggplot( data = plants_rakeabund_wide[simpsons_div_nat != Inf],
aes(surveysimpson_nat, simpsons_div_nat))+
geom_point()+
geom_smooth(method = "lm")+
ylab("Point-scale ENSpie")+
xlab("Lake-scale ENSpie")+
theme_bw()
lake_pools <- ggplot( data = surveys,
aes(watershedsimpson_nat, simpsons_div_nat))+
geom_point()+
geom_smooth(method = "lm")+
ylab("Lake-scale ENSpie")+
xlab("Watershed-scale ENSpie")+
theme_bw()
ggarrange(
point_pools,
lake_pools
)
#bring simpsons div back to HUc8 table
setDT(watersheds_huc8)
watersheds_huc8[watershed_occurrence_wide, on = .(major = watershed) , simpson_div_nat := simpson_div_nat ]
#clean up intermediates:
rm(lake_pools, point_pools, survey_species_matrix)
# **data products -----------------------------------------------------------
We have 6 datasets to export:
# names(plants)
plants[ , c("DATASOURCE", "STA_NBR_DATASOURCE", "SURVEY_ID_DATASOURCE", "SAMPLE_NOTES", "OLD_SURVEY_ID", "DATESURVEYSTART", "COHORT", "DATEINFO", "MONTH", "DAY", "YEAR", "INVENTORY_STAFF", "INVENTORY_STAFFDATE", "USEABLE", "CLEANED", "INDATABASE", "INVENTORY_NOTES", "SUBMISSION_STAFF", "SUBMISSION_STAFFDATE", "SUBMISSION_NOTES", "SURVEY_FEEDBACK") := NULL , ]
setcolorder(plants, c("DOW", "LAKE_NAME", "order_ID", "SUBBASIN", "watershed","alltime_maxvegdep",
"SURVEY_ID", "SURVEY_DATASOURCE", "SURVEY_DATE", "MULTIPARTSURVEY", "SURVEYOR","RAKE_SCALE_USED","survey_maxvegdep",
"Secchi_m", "SECCHI_DATE", "SECCHI_m_ACCEPTED",
"POINT_ID" ,"DEPTH_FT", "proplight", "Longitude", "Latitude",
"NO_VEG_FOUND", "WHOLE_RAKE_REL_ABUND","SUBSTRATE", "SURFACE_GROWTH", "POINT_LVL_SECCHI",
"OBS_ID", "TAXON", "REL_ABUND", "REL_ABUND_CORRECTED"
))
# export_names_plants <- tolower(names(plants))
# metadata for column names in this file
# dow: MN Dept of Waters Ident.
# lake_name: Name of the lake.
# order_ID: key used to link to MN Hydrography dataset
# subbasin: Sub-basin where the observation was made.
# watershed: Watershed associated with the observation.
# alltime_maxvegdep: Maximum vegetation depth ever observed in the lake (excludes any depth observation >50ft).
# survey_id: Identification number for the survey.
# survey_datasource: Name of the source of the survey data.
# survey_date: Date when the survey was conducted, if multiple dates uses the first day of the survey.
# multipartsurvey: Indicator for if the survey is part of a larger survey. Numeric with structure of SURVEY.PART
# surveyor: Person or entity conducting the survey if known.
# rake_scale_used: Scale used for rake abundance measurements.
# survey_maxvegdep: Maximum vegetation depth observed during the survey.
# secchi_m: Nearest temporal Secchi depth measured in meters.
# secchi_date: Date when Secchi depth was measured.
# secchi_m_accepted: Secchi depth measurement if observation is within 30d of the plant survey (used for proplight calculation).
# point_id: Identification number for the observation point.
# depth_ft: Depth in feet.
# proplight: Proportion of surface light remaining at DEPTH_FT.
# longitude: Longitude coordinate of the observation point.
# latitude: Latitude coordinate of the observation point.
# no_veg_found: Indicator if no vegetation was found at point.
# whole_rake_rel_abund: Relative abundance rating assigned to the whole rake (all species), if assigned.
# substrate: Substrate type.
# surface_growth: Indicator variable for plant growth reached surface of water.
# point_lvl_secchi: Secchi level at the observation point if recorded.
# obs_id: Identification number for the observation.
# taxon: Name of taxon observed.
# rel_abund: Relative abundance observed (see RAKE_SCALE_USED for possible values).
# rel_abund_corrected: Corrected relative abundance (fixes all relative abunds to scale of 1,2,3).
# fwrite(plants, file = "data&scripts/data/output/DRUM/plants_env_data.csv")
# names(plants_occurrence_wide)
setcolorder(plants_occurrence_wide, c("DOW", "LAKE_NAME", "order_ID", "SUBBASIN",
"watershed", "watershedrichness", "watershedsimpson_nat",
"SURVEY_ID", "SURVEY_DATASOURCE", "SURVEY_DATE", "MULTIPARTSURVEY", "SURVEYOR","surveyrichness", "surveysimpson_nat",
"Secchi_m", "SECCHI_DATE", "SECCHI_m_ACCEPTED",
"POINT_ID" ,"DEPTH_FT", "proplight", "Longitude", "Latitude",
"richness", "nat_richness"))
# export_names_plants_occurrence_wide <- tolower(names(plants_occurrence_wide))
names(plants_occurrence_wide)
## [1] "DOW" "LAKE_NAME"
## [3] "order_ID" "SUBBASIN"
## [5] "watershed" "watershedrichness"
## [7] "watershedsimpson_nat" "SURVEY_ID"
## [9] "SURVEY_DATASOURCE" "SURVEY_DATE"
## [11] "MULTIPARTSURVEY" "SURVEYOR"
## [13] "surveyrichness" "surveysimpson_nat"
## [15] "Secchi_m" "SECCHI_DATE"
## [17] "SECCHI_m_ACCEPTED" "POINT_ID"
## [19] "DEPTH_FT" "proplight"
## [21] "Longitude" "Latitude"
## [23] "richness" "nat_richness"
## [25] "Acorus americanus" "Alisma triviale"
## [27] "Andromeda glaucophylla" "Andromeda polifolia"
## [29] "Asclepias incarnata" "Asteraceae"
## [31] "Betula pumila" "Bidens beckii"
## [33] "Bolboschoenus fluviatilis" "Bolboschoenus maritimus"
## [35] "Boltonia asteroides" "Brasenia schreberi"
## [37] "Butomus umbellatus" "Calamagrostis canadensis"
## [39] "Calla palustris" "Caltha palustris"
## [41] "Carex aquatilis" "Carex comosa"
## [43] "Carex lacustris" "Carex pellita"
## [45] "Carex scoparia" "Ceratophyllum demersum"
## [47] "Chamaedaphne calyculata" "Chara canescens"
## [49] "Chara globularis" "Cicuta maculata"
## [51] "Drepanocladus aduncus" "Dulichium arundinaceum"
## [53] "Elatine minima" "Eleocharis acicularis"
## [55] "Eleocharis erythropoda" "Eleocharis palustris"
## [57] "Eleocharis smallii" "Elodea canadensis"
## [59] "Elodea nuttallii" "Equisetum fluviatile"
## [61] "Eriocaulon aquaticum" "Eupatorium dubium"
## [63] "Eupatorium maculatum" "Eupatorium perfoliatum"
## [65] "Fontinalis antipyretica" "Fontinalis sullivantii"
## [67] "Glyceria borealis" "Heteranthera dubia"
## [69] "Hippuris vulgaris" "Hypericum ellipticum"
## [71] "Impatiens capensis" "Iris versicolor"
## [73] "Iris virginica" "Isoetes echinospora"
## [75] "Juncus arcticus" "Juncus canadensis"
## [77] "Juncus effusus" "Juncus pelocarpus"
## [79] "Ledum groenlandicum" "Leersia oryzoides"
## [81] "Lemna minor" "Lemna trisulca"
## [83] "Lemna turionifera" "Littorella uniflora"
## [85] "Lobelia dortmanna" "Lychnothamnus barbatus"
## [87] "Lycopus americanus" "Lysimachia terrestris"
## [89] "Lythrum salicaria" "Menyanthes trifoliata"
## [91] "Myrica gale" "Myriophyllum alterniflorum"
## [93] "Myriophyllum exalbescens" "Myriophyllum farwellii"
## [95] "Myriophyllum sibiricum" "Myriophyllum spicatum"
## [97] "Myriophyllum tenellum" "Myriophyllum verticillatum"
## [99] "Najas flexilis" "Najas guadalupensis"
## [101] "Najas minor" "Nasturtium officinale"
## [103] "Nelumbo lutea" "Nitellopsis obtusa"
## [105] "Nuphar advena" "Nuphar microphylla"
## [107] "Nuphar variegata" "Nymphaea odorata"
## [109] "Nymphaea tuberosa" "Persicaria amphibia"
## [111] "Persicaria lapathifolia" "Phalaris arundinacea"
## [113] "Phragmites australis" "Polygonum amphibium"
## [115] "Pontederia cordata" "Potamogeton alpinus"
## [117] "Potamogeton amplifolius" "Potamogeton crispus"
## [119] "Potamogeton epihydrus" "Potamogeton foliosus"
## [121] "Potamogeton friesii" "Potamogeton gramineus"
## [123] "Potamogeton hillii" "Potamogeton illinoensis"
## [125] "Potamogeton natans" "Potamogeton nodosus"
## [127] "Potamogeton obtusifolius" "Potamogeton praelongus"
## [129] "Potamogeton pusillus" "Potamogeton richardsonii"
## [131] "Potamogeton robbinsii" "Potamogeton spirillus"
## [133] "Potamogeton strictifolius" "Potamogeton vaseyi"
## [135] "Potamogeton zosteriformis" "Potentilla palustris"
## [137] "Protectedspecies 1" "Protectedspecies 10"
## [139] "Protectedspecies 11" "Protectedspecies 12"
## [141] "Protectedspecies 2" "Protectedspecies 3"
## [143] "Protectedspecies 4" "Protectedspecies 5"
## [145] "Protectedspecies 6" "Protectedspecies 7"
## [147] "Protectedspecies 8" "Protectedspecies 9"
## [149] "Ranunculus aquatilis" "Ranunculus flabellaris"
## [151] "Ranunculus flammula" "Ranunculus longirostris"
## [153] "Riccia fluitans" "Ricciocarpos natans"
## [155] "Rumex orbiculatus" "Sagittaria cristata"
## [157] "Sagittaria cuneata" "Sagittaria graminea"
## [159] "Sagittaria latifolia" "Sagittaria rigida"
## [161] "Schoenoplectus acutus" "Schoenoplectus americanus"
## [163] "Schoenoplectus pungens" "Schoenoplectus subterminalis"
## [165] "Schoenoplectus tabernaemontani" "Schoenoplectus x oblongus"
## [167] "Scirpus atrovirens" "Scirpus cyperinus"
## [169] "Scirpus validus" "Scolochloa festucacea"
## [171] "Scorpidium scorpioides" "Scutellaria lateriflora"
## [173] "Sium suave" "Sparganium americanum"
## [175] "Sparganium angustifolium" "Sparganium emersum"
## [177] "Sparganium eurycarpum" "Sparganium fluctuans"
## [179] "Sparganium natans" "Sphagnum magellanicum"
## [181] "Spirodela polyrhiza" "Stuckenia filiformis"
## [183] "Stuckenia pectinata" "Tolypella intricata"
## [185] "Triadenum fraseri" "Typha angustifolia"
## [187] "Typha glauca" "Typha latifolia"
## [189] "Utricularia gibba" "Utricularia intermedia"
## [191] "Utricularia macrorhiza" "Utricularia minor"
## [193] "Utricularia vulgaris" "Vallisneria americana"
## [195] "Veronica americana" "Wolffia borealis"
## [197] "Wolffia columbiana" "Zannichellia palustris"
## [199] "Zizania palustris" "acorus"
## [201] "alisma" "alnus"
## [203] "bidens" "callitriche"
## [205] "carex" "ceratophyllum"
## [207] "chara" "characeae"
## [209] "cicuta" "cyperaceae"
## [211] "drepanocladus" "elatine"
## [213] "eleocharis" "elodea"
## [215] "equisetum" "eragrostis"
## [217] "eutrochium" "hypericum"
## [219] "impatiens" "iris"
## [221] "isoetes" "juncus"
## [223] "lamiaceae" "lemna"
## [225] "lysimachia" "myriophyllum"
## [227] "najas" "nitella"
## [229] "nuphar" "nymphaea"
## [231] "nymphaeaceae" "persicaria"
## [233] "poaceae" "potamogeton"
## [235] "potamogeton (broad)" "potamogeton (narrow)"
## [237] "ranunculus" "riccia"
## [239] "sagittaria" "salix"
## [241] "schoenoplectus" "scirpus"
## [243] "scutellaria" "solidago"
## [245] "sparganium" "sparganium (emergent)"
## [247] "sparganium (floating)" "sphagnum"
## [249] "stuckenia" "typha"
## [251] "utricularia" "verbena"
## [253] "wolffia" "zizania"
## [255] "zosterella"
# I ran glimpse() and then added in metadata:
# DOW <int> MN Dept of Waters Ident.
# LAKE_NAME <chr> Name of the lake.
# order_ID <int> key used to link to MN Hydrography dataset
# SUBBASIN <chr> Sub-basin where the observation was made.
# watershed <dbl> Watershed associated with the observation. numeric key for watershed (see watershed_occurrence_wide for detail on watersheds like names, sizes, etc. )
# watershedrichness <int> taxa richness across all surveys in watershed
# watershedsimpson_nat <dbl> inverse simpsons diversity in watershed
# SURVEY_ID <int> Identification number for the survey.
# SURVEY_DATASOURCE <chr> Name of the source of the survey data.
# SURVEY_DATE <IDate> Date when the survey was conducted, if multiple dates uses the first day of the survey.
# MULTIPARTSURVEY <dbl> Indicator for if the survey is part of a larger survey. Numeric with structure of [SURVEY.PART]
# SURVEYOR <chr> Person or entity conducting the survey if known.
# surveyrichness <dbl> Taxonomic richness of survey
# surveysimpson_nat <dbl> Inverse simpsons diversity of survey
# Secchi_m <dbl> Nearest temporal Secchi depth measured in meters.
# SECCHI_DATE <IDate> Date when Secchi depth was measured.
# SECCHI_m_ACCEPTED <dbl> Secchi depth measurement if observation is within 30d of the plant survey (used for proplight calculation).
# POINT_ID <int> Identification number for the observation point.
# DEPTH_FT <dbl> Depth to substrate in feet.
# proplight <dbl> Proportion of surface light remaining at DEPTH_FT.
# Longitude <dbl> Longitude coordinate of the observation point.
# Latitude <dbl> Latitude coordinate of the observation point.
# richness <dbl> Number of unique taxa observed at the point.
# nat_richness <dbl> Number of unique native taxa observed at the point.
# the remaining columns are species occurrence columns, indicating whether a taxon was observed, along with it's relative rake abundance (1-3) or not observed (0) at a sample point
# fwrite(plants_occurrence_wide, file = "data&scripts/data/output/DRUM/plants_occurrence_wide.csv")
# names(plants_rakeabund_wide)
setcolorder(plants_rakeabund_wide, c("DOW", "LAKE_NAME", "order_ID", "SUBBASIN",
"watershed", "watershedrichness", "watershedsimpson_nat",
"SURVEY_ID", "SURVEY_DATASOURCE", "SURVEY_DATE", "MULTIPARTSURVEY", "SURVEYOR", "surveyrichness", "surveysimpson_nat",
"Secchi_m", "SECCHI_DATE", "SECCHI_m_ACCEPTED",
"POINT_ID" ,"DEPTH_FT", "proplight", "Longitude", "Latitude",
"shannon_div", "simpsons_div", "shannon_div_nat", "simpsons_div_nat", "richness", "nat_richness"
))
# export_names_plants_rakeabund_wide <- tolower(names(plants_rakeabund_wide))
names(plants_rakeabund_wide)
## [1] "DOW" "LAKE_NAME"
## [3] "order_ID" "SUBBASIN"
## [5] "watershed" "watershedrichness"
## [7] "watershedsimpson_nat" "SURVEY_ID"
## [9] "SURVEY_DATASOURCE" "SURVEY_DATE"
## [11] "MULTIPARTSURVEY" "SURVEYOR"
## [13] "surveyrichness" "surveysimpson_nat"
## [15] "Secchi_m" "SECCHI_DATE"
## [17] "SECCHI_m_ACCEPTED" "POINT_ID"
## [19] "DEPTH_FT" "proplight"
## [21] "Longitude" "Latitude"
## [23] "shannon_div" "simpsons_div"
## [25] "shannon_div_nat" "simpsons_div_nat"
## [27] "richness" "nat_richness"
## [29] "Bidens beckii" "Bolboschoenus fluviatilis"
## [31] "Brasenia schreberi" "Caltha palustris"
## [33] "Carex comosa" "Carex pellita"
## [35] "Carex scoparia" "Ceratophyllum demersum"
## [37] "Chara globularis" "Eleocharis acicularis"
## [39] "Eleocharis erythropoda" "Eleocharis palustris"
## [41] "Elodea canadensis" "Elodea nuttallii"
## [43] "Equisetum fluviatile" "Fontinalis antipyretica"
## [45] "Glyceria borealis" "Heteranthera dubia"
## [47] "Hippuris vulgaris" "Iris virginica"
## [49] "Isoetes echinospora" "Juncus arcticus"
## [51] "Juncus canadensis" "Juncus effusus"
## [53] "Juncus pelocarpus" "Lemna minor"
## [55] "Lemna trisulca" "Lychnothamnus barbatus"
## [57] "Lythrum salicaria" "Myriophyllum exalbescens"
## [59] "Myriophyllum farwellii" "Myriophyllum sibiricum"
## [61] "Myriophyllum spicatum" "Myriophyllum verticillatum"
## [63] "Najas flexilis" "Najas guadalupensis"
## [65] "Najas minor" "Nelumbo lutea"
## [67] "Nitellopsis obtusa" "Nuphar advena"
## [69] "Nuphar variegata" "Nymphaea odorata"
## [71] "Nymphaea tuberosa" "Persicaria amphibia"
## [73] "Phalaris arundinacea" "Phragmites australis"
## [75] "Polygonum amphibium" "Pontederia cordata"
## [77] "Potamogeton amplifolius" "Potamogeton crispus"
## [79] "Potamogeton epihydrus" "Potamogeton foliosus"
## [81] "Potamogeton friesii" "Potamogeton gramineus"
## [83] "Potamogeton illinoensis" "Potamogeton natans"
## [85] "Potamogeton nodosus" "Potamogeton obtusifolius"
## [87] "Potamogeton praelongus" "Potamogeton pusillus"
## [89] "Potamogeton richardsonii" "Potamogeton robbinsii"
## [91] "Potamogeton spirillus" "Potamogeton strictifolius"
## [93] "Potamogeton zosteriformis" "Protectedspecies 1"
## [95] "Protectedspecies 10" "Protectedspecies 12"
## [97] "Protectedspecies 2" "Protectedspecies 3"
## [99] "Protectedspecies 7" "Protectedspecies 8"
## [101] "Ranunculus aquatilis" "Ranunculus flabellaris"
## [103] "Ranunculus longirostris" "Riccia fluitans"
## [105] "Ricciocarpos natans" "Sagittaria cristata"
## [107] "Sagittaria graminea" "Sagittaria latifolia"
## [109] "Sagittaria rigida" "Schoenoplectus acutus"
## [111] "Schoenoplectus americanus" "Schoenoplectus pungens"
## [113] "Schoenoplectus subterminalis" "Schoenoplectus tabernaemontani"
## [115] "Scirpus cyperinus" "Scirpus validus"
## [117] "Sium suave" "Sparganium eurycarpum"
## [119] "Spirodela polyrhiza" "Stuckenia filiformis"
## [121] "Stuckenia pectinata" "Tolypella intricata"
## [123] "Typha angustifolia" "Typha glauca"
## [125] "Typha latifolia" "Utricularia gibba"
## [127] "Utricularia macrorhiza" "Utricularia minor"
## [129] "Utricularia vulgaris" "Vallisneria americana"
## [131] "Wolffia borealis" "Wolffia columbiana"
## [133] "Zannichellia palustris" "carex"
## [135] "ceratophyllum" "chara"
## [137] "characeae" "cyperaceae"
## [139] "drepanocladus" "eleocharis"
## [141] "elodea" "juncus"
## [143] "lemna" "myriophyllum"
## [145] "najas" "nitella"
## [147] "nuphar" "nymphaea"
## [149] "poaceae" "potamogeton"
## [151] "potamogeton (broad)" "potamogeton (narrow)"
## [153] "ranunculus" "riccia"
## [155] "sagittaria" "salix"
## [157] "schoenoplectus" "scirpus"
## [159] "sparganium" "typha"
## [161] "utricularia" "wolffia"
## [163] "zizania"
# I ran glimpse() and then added in metadata:
# DOW <int> MN Dept of Waters Ident.
# LAKE_NAME <chr> Name of the lake.
# order_ID <int> key used to link to MN Hydrography dataset
# SUBBASIN <chr> Sub-basin where the observation was made.
# watershed <dbl> Watershed associated with the observation. numeric key for watershed (see watershed_occurrence_wide for detail on watersheds like names, sizes, etc. )
# watershedrichness <int> taxa richness across all surveys in watershed
# watershedsimpson_nat <dbl> inverse simpsons diversity in watershed
# SURVEY_ID <int> Identification number for the survey.
# SURVEY_DATASOURCE <chr> Name of the source of the survey data.
# SURVEY_DATE <IDate> Date when the survey was conducted, if multiple dates uses the first day of the survey.
# MULTIPARTSURVEY <dbl> Indicator for if the survey is part of a larger survey. Numeric with structure of [SURVEY.PART]
# SURVEYOR <chr> Person or entity conducting the survey if known.
# surveyrichness <dbl> Taxonomic richness of survey
# surveysimpson_nat <dbl> Inverse simpsons diversity of survey
# Secchi_m <dbl> Nearest temporal Secchi depth measured in meters.
# SECCHI_DATE <IDate> Date when Secchi depth was measured.
# SECCHI_m_ACCEPTED <dbl> Secchi depth measurement if observation is within 30d of the plant survey (used for proplight calculation).
# POINT_ID <int> Identification number for the observation point.
# DEPTH_FT <dbl> Depth to substrate in feet.
# proplight <dbl> Proportion of surface light remaining at DEPTH_FT.
# Longitude <dbl> Longitude coordinate of the observation point.
# Latitude <dbl> Latitude coordinate of the observation point.
# shannon_div <dbl> Shannon diversity of taxa observed at the point
# simpsons_div <dbl> Inverse simpsons diversity of taxa observed at the point
# shannon_div_nat <dbl> Shannon diversity of native taxa observed at the point
# simpsons_div_nat <dbl> Inverse simpsons diversity of native taxa observed at the point
# richness <dbl> Number of unique taxa observed at the point.
# nat_richness <dbl> Number of unique native taxa observed at the point.
# the remaining columns are species occurrence columns, indicating whether a taxon was observed (1) or not observed (0) at a sample point
# fwrite(plants_rakeabund_wide, file = "data&scripts/data/output/DRUM/plants_abund_env_data_wide.csv")
setcolorder(surveys, c("DOW", "LAKE_NAME", "order_ID", "SUBBASIN",
"watershed", "watershedrichness", "watershedsimpson_nat",
"SURVEY_ID", "SURVEY_DATASOURCE", "SURVEY_DATE", "MULTIPARTSURVEY",
"Secchi_m", "Secchi_m_date",
"nobs", "tot_n_samp",
"max_depth_surveyed", "min_depth_surveyed", "mean_depth_surveyed", "median_depth_surveyed", "IQR_depth_surveyed",
"max_depth_vegetated", "min_depth_vegetated", "mean_depth_vegetated", "median_depth_vegetated", "IQR_depth_vegetated",
"alltime_maxvegdep", "alltime_maxvegdep_n_samp", "survey_maxvegdep", "survey_maxvegdep_n_samp",
"n_points_vegetated", "prop_veg",
"shannon_div", "simpsons_div", "shannon_div_nat", "simpsons_div_nat", "taxa_richness", "nat_richness"
))
glimpse(surveys)
## Rows: 3,194
## Columns: 268
## $ DOW <int> 1001600, 1003400, 1003500, 1005300, 1…
## $ LAKE_NAME <chr> "little prairie", "horseshoe", "mud",…
## $ order_ID <int> 16369, 16343, 16671, 16418, 16689, 16…
## $ SUBBASIN <chr> "", "", "", "", "", "", "", "", "", "…
## $ watershed <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 21, 21, 10…
## $ watershedrichness <int> 107, 107, 107, 107, 107, 107, 107, 10…
## $ watershedsimpson_nat <dbl> 17.02048, 17.02048, 17.02048, 17.0204…
## $ SURVEY_ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12…
## $ SURVEY_DATASOURCE <chr> "DNR Shallow Lakes", "DNR Shallow Lak…
## $ SURVEY_DATE <IDate> 2011-08-16, 2011-08-10, 2014-07-17,…
## $ MULTIPARTSURVEY <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ Secchi_m <dbl> 0.7620, 0.6000, 1.0000, 0.8382, 1.524…
## $ Secchi_m_date <IDate> 2011-08-16, 2011-08-07, 2014-07-17,…
## $ nobs <int> 63, 126, 160, 147, 118, 133, 128, 133…
## $ tot_n_samp <int> 41, 66, 37, 35, 52, 39, 56, 45, 56, 4…
## $ max_depth_surveyed <dbl> 7.5, 11.2, 15.0, 4.0, 6.8, 6.0, 12.0,…
## $ min_depth_surveyed <dbl> 1.2, 2.2, 0.5, 1.2, 1.5, 1.5, 2.0, 2.…
## $ mean_depth_surveyed <dbl> 4.665079, 5.526984, 4.320625, 3.24217…
## $ median_depth_surveyed <dbl> 5.50, 5.00, 4.65, 3.50, 4.00, 3.80, 5…
## $ IQR_depth_surveyed <dbl> 3.600, 3.150, 1.700, 0.650, 1.600, 1.…
## $ max_depth_vegetated <dbl> 7.0, 11.0, 6.0, 4.0, 6.2, 5.0, 10.0, …
## $ min_depth_vegetated <dbl> 1.2, 2.2, 0.5, 1.2, 1.5, 1.5, 2.0, 2.…
## $ mean_depth_vegetated <dbl> 3.715000, 4.198780, 4.018301, 3.24217…
## $ median_depth_vegetated <dbl> 3.8, 3.8, 4.0, 3.5, 4.0, 3.5, 5.0, 6.…
## $ IQR_depth_vegetated <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ alltime_maxvegdep <dbl> 7.0, 11.0, 6.0, 4.0, 6.2, 5.0, 10.0, …
## $ alltime_maxvegdep_n_samp <int> 41, 66, 32, 35, 52, 36, 55, 45, 48, 4…
## $ survey_maxvegdep <dbl> 7.0, 11.0, 6.0, 4.0, 6.2, 5.0, 10.0, …
## $ survey_maxvegdep_n_samp <int> 41, 66, 32, 35, 52, 36, 55, 45, 48, 4…
## $ n_points_vegetated <int> 18, 22, 30, 35, 45, 33, 31, 45, 36, 4…
## $ prop_veg <dbl> 0.4390244, 0.3333333, 0.8108108, 1.00…
## $ shannon_div <dbl> 2.149490, 2.471995, 2.711635, 2.15670…
## $ simpsons_div <dbl> 6.250000, 10.035821, 11.292330, 7.059…
## $ shannon_div_nat <dbl> 2.053658, 2.471995, 2.689823, 2.13046…
## $ simpsons_div_nat <dbl> 5.730159, 10.035821, 11.150579, 6.966…
## $ taxa_richness <dbl> 12, 15, 24, 14, 15, 15, 23, 8, 22, 17…
## $ nat_richness <dbl> 11, 15, 23, 13, 15, 14, 23, 8, 21, 16…
## $ `Acorus americanus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Alisma triviale` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Andromeda glaucophylla` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Andromeda polifolia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Asclepias incarnata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Asteraceae <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Betula pumila` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Bidens beckii` <int> 0, 0, 0, 0, 7, 6, 2, 0, 0, 5, 0, 0, 1…
## $ `Bolboschoenus fluviatilis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Bolboschoenus maritimus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Boltonia asteroides` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Brasenia schreberi` <int> 0, 5, 10, 0, 0, 0, 0, 0, 25, 0, 0, 2,…
## $ `Butomus umbellatus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Calamagrostis canadensis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Calla palustris` <int> 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Caltha palustris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex aquatilis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex comosa` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex lacustris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex pellita` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex scoparia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ceratophyllum demersum` <int> 1, 5, 4, 30, 4, 23, 15, 42, 1, 26, 6,…
## $ `Chamaedaphne calyculata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Chara canescens` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Chara globularis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Cicuta maculata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Drepanocladus aduncus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Dulichium arundinaceum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Elatine minima` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eleocharis acicularis` <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ `Eleocharis erythropoda` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eleocharis palustris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eleocharis smallii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ `Elodea canadensis` <int> 0, 0, 0, 0, 0, 2, 10, 10, 0, 20, 7, 0…
## $ `Elodea nuttallii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Equisetum fluviatile` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eriocaulon aquaticum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eupatorium dubium` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eupatorium maculatum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eupatorium perfoliatum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Fontinalis antipyretica` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Fontinalis sullivantii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Glyceria borealis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Heteranthera dubia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ `Hippuris vulgaris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Hypericum ellipticum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Impatiens capensis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Iris versicolor` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Iris virginica` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Isoetes echinospora` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus arcticus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus canadensis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus effusus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus pelocarpus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ledum groenlandicum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Leersia oryzoides` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lemna minor` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lemna trisulca` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lemna turionifera` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Littorella uniflora` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lobelia dortmanna` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lychnothamnus barbatus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lycopus americanus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lysimachia terrestris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lythrum salicaria` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Menyanthes trifoliata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myrica gale` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum alterniflorum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum exalbescens` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum farwellii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum sibiricum` <int> 0, 1, 0, 0, 1, 0, 2, 0, 0, 6, 0, 7, 0…
## $ `Myriophyllum spicatum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum tenellum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum verticillatum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Najas flexilis` <int> 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 28, …
## $ `Najas guadalupensis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Najas minor` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nasturtium officinale` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nelumbo lutea` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nitellopsis obtusa` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nuphar advena` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nuphar microphylla` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nuphar variegata` <int> 1, 6, 5, 15, 11, 10, 8, 1, 5, 13, 15,…
## $ `Nymphaea odorata` <int> 5, 12, 28, 13, 8, 20, 7, 0, 21, 21, 2…
## $ `Nymphaea tuberosa` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Persicaria amphibia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Persicaria lapathifolia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Phalaris arundinacea` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Phragmites australis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Polygonum amphibium` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Pontederia cordata` <int> 0, 1, 12, 2, 0, 0, 3, 0, 0, 0, 0, 0, …
## $ `Potamogeton alpinus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton amplifolius` <int> 0, 0, 10, 0, 0, 0, 5, 0, 0, 0, 6, 16,…
## $ `Potamogeton crispus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton epihydrus` <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0…
## $ `Potamogeton foliosus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton friesii` <int> 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton gramineus` <int> 0, 0, 1, 0, 0, 0, 1, 0, 4, 0, 2, 0, 1…
## $ `Potamogeton hillii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton illinoensis` <int> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `Potamogeton natans` <int> 0, 3, 13, 15, 5, 4, 1, 5, 0, 11, 1, 0…
## $ `Potamogeton nodosus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton obtusifolius` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton praelongus` <int> 0, 4, 0, 0, 1, 0, 2, 0, 0, 0, 13, 9, …
## $ `Potamogeton pusillus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton richardsonii` <int> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton robbinsii` <int> 5, 0, 0, 15, 0, 0, 0, 0, 2, 0, 0, 7, …
## $ `Potamogeton spirillus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton strictifolius` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton vaseyi` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton zosteriformis` <int> 0, 11, 6, 7, 14, 15, 11, 43, 0, 37, 1…
## $ `Potentilla palustris` <int> 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `Protectedspecies 1` <int> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 3…
## $ `Protectedspecies 10` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 11` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 12` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 2` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 3` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 4` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 5` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 6` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 7` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 8` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 9` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ranunculus aquatilis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ranunculus flabellaris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ranunculus flammula` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ranunculus longirostris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Riccia fluitans` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ricciocarpos natans` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Rumex orbiculatus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sagittaria cristata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sagittaria cuneata` <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sagittaria graminea` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sagittaria latifolia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sagittaria rigida` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Schoenoplectus acutus` <int> 0, 2, 0, 0, 0, 0, 8, 1, 0, 4, 1, 0, 0…
## $ `Schoenoplectus americanus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Schoenoplectus pungens` <int> 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0…
## $ `Schoenoplectus subterminalis` <int> 0, 0, 17, 0, 0, 0, 0, 0, 1, 0, 0, 0, …
## $ `Schoenoplectus tabernaemontani` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Schoenoplectus x oblongus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scirpus atrovirens` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scirpus cyperinus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scirpus validus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scolochloa festucacea` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scorpidium scorpioides` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scutellaria lateriflora` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sium suave` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sparganium americanum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sparganium angustifolium` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sparganium emersum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sparganium eurycarpum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sparganium fluctuans` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0…
## $ `Sparganium natans` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sphagnum magellanicum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Spirodela polyrhiza` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Stuckenia filiformis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Stuckenia pectinata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0…
## $ `Tolypella intricata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Triadenum fraseri` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Typha angustifolia` <int> 2, 0, 1, 1, 0, 1, 0, 0, 1, 3, 0, 0, 0…
## $ `Typha glauca` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Typha latifolia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Utricularia gibba` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Utricularia intermedia` <int> 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `Utricularia macrorhiza` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Utricularia minor` <int> 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0…
## $ `Utricularia vulgaris` <int> 0, 4, 18, 10, 1, 3, 5, 3, 9, 14, 3, 0…
## $ `Vallisneria americana` <int> 3, 6, 2, 0, 13, 9, 1, 0, 0, 0, 11, 2,…
## $ `Veronica americana` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Wolffia borealis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Wolffia columbiana` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Zannichellia palustris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Zizania palustris` <int> 0, 14, 4, 34, 33, 22, 4, 28, 0, 39, 2…
## $ acorus <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ alisma <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ alnus <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ bidens <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ callitriche <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ carex <int> 3, 0, 4, 1, 0, 9, 0, 0, 1, 0, 0, 0, 0…
## $ ceratophyllum <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ chara <int> 0, 0, 1, 0, 0, 1, 5, 0, 0, 1, 0, 1, 1…
## $ characeae <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cicuta <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cyperaceae <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ drepanocladus <int> 13, 6, 2, 0, 8, 0, 0, 0, 13, 0, 0, 0,…
## $ elatine <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ eleocharis <int> 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ elodea <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2…
## $ equisetum <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0…
## $ eragrostis <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ eutrochium <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hypericum <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ impatiens <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ iris <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ isoetes <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ juncus <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lamiaceae <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lemna <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lysimachia <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ myriophyllum <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ najas <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nitella <int> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ nuphar <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nymphaea <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ nymphaeaceae <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ persicaria <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ poaceae <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ potamogeton <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `potamogeton (broad)` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `potamogeton (narrow)` <int> 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 25, …
## $ ranunculus <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ riccia <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sagittaria <int> 2, 0, 4, 1, 3, 0, 0, 0, 3, 10, 6, 0, …
## $ salix <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ schoenoplectus <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ scirpus <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ scutellaria <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ solidago <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sparganium <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ `sparganium (emergent)` <int> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `sparganium (floating)` <int> 2, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0…
## $ sphagnum <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ stuckenia <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ typha <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ utricularia <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ verbena <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ wolffia <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ zizania <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ zosterella <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
# export_names_surveys <- tolower(names(surveys))
# DOW <int> MN Dept of Waters Ident.
# LAKE_NAME <chr> Name of the lake.
# order_ID <int> key used to link to MN Hydrography dataset
# SUBBASIN <chr> Sub-basin where the observation was made.
# watershed <dbl> Watershed associated with the observation. numeric key for watershed (see watershed_occurrence_wide for detail on watersheds like names, sizes, etc. )
# watershedrichness <int> taxa richness across all surveys in watershed
# watershedsimpson_nat <dbl> inverse simpsons diversity in watershed
# SURVEY_ID <int> Identification number for the survey.
# SURVEY_DATASOURCE <chr> Name of the source of the survey data.
# SURVEY_DATE <IDate> Date when the survey was conducted, if multiple dates uses the first day of the survey.
# MULTIPARTSURVEY <dbl> Indicator for if the survey is part of a larger survey. Numeric with structure of [SURVEY.PART]
# Secchi_m <dbl> Nearest temporal Secchi depth measured in meters.
# Secchi_m_date <IDate> Date when Secchi depth was measured.
# nobs <int> number of observations in this survey (5 taxa observed at a point = 5 observations--thus this metric can exceed the tot n samp vlaue)
# tot_n_samp <int> total number of samples taken/points sampled
# max_depth_surveyed <dbl> max depth that survyors sampled (ALL DEPTHS IN FEET)
# min_depth_surveyed <dbl> min depth that surveyors sampled (ALL DEPTHS IN FEET)
# mean_depth_surveyed <dbl> mean depth that surveyors sampled (ALL DEPTHS IN FEET)
# median_depth_surveyed <dbl> median depth that surveyors sampled (ALL DEPTHS IN FEET)
# IQR_depth_surveyed <dbl> inter-quartile range depth that surveyors sampled (ALL DEPTHS IN FEET)
# max_depth_vegetated <dbl> maximum depth where vegetation was observed (ALL DEPTHS IN FEET)
# min_depth_vegetated <dbl> min depth where vegetation was observed (ALL DEPTHS IN FEET)
# mean_depth_vegetated <dbl> mean depth where vegetation was observed (ALL DEPTHS IN FEET)
# median_depth_vegetated <dbl> median depth where vegetation was observed (ALL DEPTHS IN FEET)
# IQR_depth_vegetated <dbl> inter-quartile range depth where vegetation was observed (ALL DEPTHS IN FEET)
# alltime_maxvegdep <dbl> the max depth of plants ever observed in this lake (across all surveys in this db)
# alltime_maxvegdep_n_samp <int> Number of samples taken from points less than alltime_maxvegdep during this survey
# survey_maxvegdep <dbl> Survey maximum vegetation depth.
# survey_maxvegdep_n_samp <int> Number of samples for survey maximum vegetation depth.
# n_points_vegetated <int> Number of points with veg present
# prop_veg <dbl> n_points_vegetated/tot_n_samp
# shannon_div <dbl> Shannon diversity index for this survey
# simpsons_div <dbl> survey inverse Simpson's diversity index.
# shannon_div_nat <dbl> survey Shannon diversity index including native taxa only.
# simpsons_div_nat <dbl> survey inverse Simpson's diversity index including native taxa only
# taxa_richness <dbl> count of taxa in this survey
# nat_richness <dbl> native species taxon count in this survey
# the remaining columns are species occurrence count columns, indicating the number of points at which a taxon was observed (1+) or not observed (0) during a survey.
# fwrite(surveys, file = "data&scripts/data/output/DRUM/surveys_aqplants.csv")
# names(missing_data_surveys)
missing_data_surveys[ , c( "STA_NBR_DATASOURCE", "SURVEY_ID_DATASOURCE", "SAMPLE_NOTES", "OLD_SURVEY_ID", "DATESURVEYSTART",
"COHORT","POINT_ID" ,"DEPTH_FT", "NO_VEG_FOUND", "WHOLE_RAKE_REL_ABUND","SUBSTRATE", "SURFACE_GROWTH",
"POINT_LVL_SECCHI","OBS_ID", "TAXON", "REL_ABUND","RAKE_SCALE_USED",
"X", "Y", "NORTHING", "EASTING", "LATITUDE", "LONGITUDE", "UTMX", "UTMY"
) := NULL , ]
setcolorder(missing_data_surveys, c("DOW", "LAKE_NAME", "SUBBASIN",
"DATASOURCE", "SURVEY_ID", "SURVEY_DATASOURCE", "SURVEY_DATE", "MULTIPARTSURVEY", "SURVEYOR",
"DATEINFO", "MONTH", "DAY", "YEAR",
"INVENTORY_STAFF", "INVENTORY_STAFFDATE", "USEABLE", "CLEANED", "INDATABASE",
"INVENTORY_NOTES", "SUBMISSION_STAFF", "SUBMISSION_STAFFDATE", "SUBMISSION_NOTES", "SURVEY_FEEDBACK"
))
# export_missing_data_surveys <- tolower(names(missing_data_surveys))
glimpse(missing_data_surveys)
## Rows: 257
## Columns: 23
## $ DOW <int> 62000600, 62000600, 62000600, 62000600, 62000600,…
## $ LAKE_NAME <chr> "kohlman", "kohlman", "kohlman", "kohlman", "kohl…
## $ SUBBASIN <chr> "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ DATASOURCE <chr> "source_14", "source_14", "source_14", "source_14…
## $ SURVEY_ID <int> 3319, 3320, 3321, 3322, 3323, 3324, 3325, 3326, 3…
## $ SURVEY_DATASOURCE <chr> "Ramsey County", "Ramsey County", "Ramsey County"…
## $ SURVEY_DATE <IDate> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ MULTIPARTSURVEY <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ SURVEYOR <chr> "Ramsey County", "Ramsey County", "Ramsey County"…
## $ DATEINFO <chr> "", "", "", "", "", "", "", "", "", "", "", "", "…
## $ MONTH <int> 8, 8, 4, 8, 5, 4, 8, 9, 6, 9, 6, 8, 8, 9, 8, 8, 9…
## $ DAY <int> 23, 21, 28, 19, 13, 1, 23, 26, 16, 10, 9, 23, 26,…
## $ YEAR <int> 2004, 2007, 2008, 2008, 2009, 2010, 2004, 2008, 2…
## $ INVENTORY_STAFF <chr> "Staff_1", "Staff_1", "Staff_1", "Staff_1", "Staf…
## $ INVENTORY_STAFFDATE <chr> "1/13/2020", "1/13/2020", "1/13/2020", "1/13/2020…
## $ USEABLE <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
## $ CLEANED <chr> "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",…
## $ INDATABASE <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ INVENTORY_NOTES <chr> "No raw data;", "No raw data;", "No raw data;", "…
## $ SUBMISSION_STAFF <chr> "staff_5", "staff_5", "staff_3", "staff_3", "staf…
## $ SUBMISSION_STAFFDATE <chr> "2/21/2019", "2/21/2019", "8/6/2019", "8/6/2019",…
## $ SUBMISSION_NOTES <chr> "excel with general stats but no raw data;", "exc…
## $ SURVEY_FEEDBACK <chr> "no data available", "no data available", "no dat…
# DOW <int> MN Dept of Waters Ident.
# LAKE_NAME <chr> Name of the lake
# SUBBASIN <chr> Subbasin where the survey was conducted if applicable
# DATASOURCE <chr> Internal listing for source that identified the survey
# SURVEY_ID <int> Unique identifier for the survey
# SURVEY_DATASOURCE <chr> Source or authority for the survey data (could be contacted to try to acquire these data)
# SURVEY_DATE <IDate> Date when the survey was conducted, if multiple dates uses the first day of the survey
# MULTIPARTSURVEY <dbl> Indicator for if the survey is part of a larger survey. Numeric with structure of SURVEY.PART
# SURVEYOR <chr> Surveyor name(s) if known
# DATEINFO <chr> Date information that may help in identifying the survey
# MONTH <int> Month of the survey
# DAY <int> Day of the survey.
# YEAR <int> Year of the survey
# INVENTORY_STAFF <chr> Inventory staff name
# INVENTORY_STAFFDATE <chr> Date of inventory by staff
# USEABLE <chr> Indicator for data usability as submitted to project team
# CLEANED <chr> Indicator for successful pre-cleaning of the data
# INDATABASE <lgl> Indicator for sucessful processing into database
# INVENTORY_NOTES <chr> Inventory notes from project staff
# SUBMISSION_STAFF <chr> staff name that processed the original submission
# SUBMISSION_STAFFDATE <chr> Date of submission processing
# SUBMISSION_NOTES <chr> Submission notes from project staff
# SURVEY_FEEDBACK <chr> Feedback from the survey of data contributors
# fwrite(missing_data_surveys, file = "data&scripts/data/output/DRUM/missing_data_surveys.csv")
# watersheds_huc8[ , simpson_div_nat := NULL , ]
watershed_occurrence_wide <- watershed_occurrence_wide[as.data.table(watersheds_huc8)[ ,.SD , .SDcols = !c("simpson_div_nat")], on = .(watershed=major) , ]
watershed_occurrence_wide[ , c("NA", "HUC_8", "Shape_Leng", "Shape_Area") := NULL , ]
# colnames(watershed_occurrence_wide)
names <- c("watershed","major_name",
"acres", "sq_miles", "prod_year", "source", "geometry",
"n_points", "n_species", "simpson_div_nat")
setcolorder(watershed_occurrence_wide, names )
glimpse(watershed_occurrence_wide
)
## Rows: 81
## Columns: 241
## $ watershed <dbl> 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, …
## $ major_name <chr> "Mississippi River - Headwaters", "Le…
## $ acres <dbl> 1228884, 857968, 1332793, 1076295, 50…
## $ sq_miles <dbl> 1920, 1341, 2082, 1682, 783, 1983, 89…
## $ prod_year <dbl> 2009, 2009, 2009, 2009, 2009, 2009, 2…
## $ source <chr> "DNR Catchment Dataset", "DNR Catchme…
## $ geometry <MULTIPOLYGON [m]> MULTIPOLYGON (((449453.7…
## $ n_points <int> 8458, 23372, 7304, 7560, 15915, 18619…
## $ n_species <int> 93, 83, 107, 107, 92, 115, 39, 79, 75…
## $ simpson_div_nat <dbl> 18.708643, 11.810110, 17.020477, 17.0…
## $ `Acorus americanus` <int> 0, 3, 2, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ `Alisma triviale` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Andromeda glaucophylla` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Andromeda polifolia` <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Asclepias incarnata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Asteraceae <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Betula pumila` <int> 0, 0, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Bidens beckii` <int> 89, 162, 103, 159, 404, 251, 0, 146, …
## $ `Bolboschoenus fluviatilis` <int> 7, 0, 2, 0, 2, 0, 0, 0, 7, 0, 0, 0, 9…
## $ `Bolboschoenus maritimus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Boltonia asteroides` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6…
## $ `Brasenia schreberi` <int> 20, 279, 186, 297, 166, 604, 16, 127,…
## $ `Butomus umbellatus` <int> 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Calamagrostis canadensis` <int> 0, 0, 4, 6, 0, 9, 0, 0, 0, 0, 0, 0, 0…
## $ `Calla palustris` <int> 0, 0, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Caltha palustris` <int> 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex aquatilis` <int> 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex comosa` <int> 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex lacustris` <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1…
## $ `Carex pellita` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Carex scoparia` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ceratophyllum demersum` <int> 1493, 1827, 1709, 2756, 4378, 5275, 7…
## $ `Chamaedaphne calyculata` <int> 0, 0, 5, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ `Chara canescens` <int> 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Chara globularis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Cicuta maculata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Drepanocladus aduncus` <int> 1, 2, 1, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0…
## $ `Dulichium arundinaceum` <int> 1, 6, 17, 4, 4, 9, 0, 0, 0, 4, 0, 0, …
## $ `Elatine minima` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eleocharis acicularis` <int> 5, 38, 6, 48, 119, 26, 0, 91, 18, 0, …
## $ `Eleocharis erythropoda` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eleocharis palustris` <int> 8, 1, 12, 0, 0, 11, 0, 0, 0, 0, 0, 0,…
## $ `Eleocharis smallii` <int> 4, 0, 2, 9, 1, 10, 0, 0, 0, 0, 0, 2, …
## $ `Elodea canadensis` <int> 651, 2280, 113, 1023, 1949, 2667, 9, …
## $ `Elodea nuttallii` <int> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Equisetum fluviatile` <int> 44, 2, 23, 10, 15, 18, 0, 16, 2, 0, 0…
## $ `Eriocaulon aquaticum` <int> 0, 3, 4, 14, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `Eupatorium dubium` <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eupatorium maculatum` <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Eupatorium perfoliatum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Fontinalis antipyretica` <int> 0, 0, 0, 36, 0, 0, 0, 27, 0, 0, 1, 0,…
## $ `Fontinalis sullivantii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Glyceria borealis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Heteranthera dubia` <int> 49, 162, 46, 282, 481, 290, 0, 211, 1…
## $ `Hippuris vulgaris` <int> 24, 21, 0, 0, 0, 87, 2, 7, 0, 1, 0, 8…
## $ `Hypericum ellipticum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Impatiens capensis` <int> 2, 0, 0, 0, 0, 5, 0, 0, 1, 0, 0, 0, 0…
## $ `Iris versicolor` <int> 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 2, 0…
## $ `Iris virginica` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Isoetes echinospora` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus arcticus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus canadensis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus effusus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Juncus pelocarpus` <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ledum groenlandicum` <int> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Leersia oryzoides` <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lemna minor` <int> 5, 0, 1, 28, 3, 92, 0, 2, 73, 58, 357…
## $ `Lemna trisulca` <int> 772, 183, 490, 1644, 548, 749, 0, 919…
## $ `Lemna turionifera` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Littorella uniflora` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lobelia dortmanna` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lychnothamnus barbatus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lycopus americanus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ `Lysimachia terrestris` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Lythrum salicaria` <int> 1, 0, 0, 2, 0, 6, 0, 0, 0, 0, 0, 0, 0…
## $ `Menyanthes trifoliata` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myrica gale` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum alterniflorum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum exalbescens` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Myriophyllum farwellii` <int> 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `Myriophyllum sibiricum` <int> 513, 1486, 464, 484, 2250, 1769, 20, …
## $ `Myriophyllum spicatum` <int> 0, 0, 0, 12, 87, 0, 0, 113, 0, 0, 89,…
## $ `Myriophyllum tenellum` <int> 0, 43, 2, 4, 5, 0, 0, 0, 0, 0, 0, 0, …
## $ `Myriophyllum verticillatum` <int> 2, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 17, …
## $ `Najas flexilis` <int> 1101, 1847, 319, 501, 1608, 3493, 128…
## $ `Najas guadalupensis` <int> 48, 50, 3, 5, 1937, 566, 0, 1595, 222…
## $ `Najas minor` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nasturtium officinale` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ `Nelumbo lutea` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nitellopsis obtusa` <int> 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 196…
## $ `Nuphar advena` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9…
## $ `Nuphar microphylla` <int> 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Nuphar variegata` <int> 652, 484, 392, 868, 413, 1361, 80, 61…
## $ `Nymphaea odorata` <int> 639, 544, 666, 1143, 736, 1277, 42, 6…
## $ `Nymphaea tuberosa` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Persicaria amphibia` <int> 0, 0, 2, 0, 4, 14, 0, 0, 0, 0, 0, 0, …
## $ `Persicaria lapathifolia` <int> 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0…
## $ `Phalaris arundinacea` <int> 0, 0, 0, 0, 3, 2, 1, 2, 2, 0, 0, 3, 1…
## $ `Phragmites australis` <int> 38, 11, 3, 25, 5, 13, 0, 5, 4, 1, 1, …
## $ `Polygonum amphibium` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 4, 0…
## $ `Pontederia cordata` <int> 0, 0, 19, 2, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ `Potamogeton alpinus` <int> 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton amplifolius` <int> 120, 905, 134, 391, 598, 1015, 22, 38…
## $ `Potamogeton crispus` <int> 95, 0, 83, 1048, 97, 1211, 1, 1082, 4…
## $ `Potamogeton epihydrus` <int> 0, 0, 21, 13, 10, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton foliosus` <int> 2, 121, 3, 3, 0, 128, 0, 118, 0, 17, …
## $ `Potamogeton friesii` <int> 389, 441, 528, 162, 713, 547, 0, 368,…
## $ `Potamogeton gramineus` <int> 78, 387, 165, 186, 551, 265, 2, 154, …
## $ `Potamogeton hillii` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton illinoensis` <int> 293, 359, 199, 93, 682, 692, 2, 531, …
## $ `Potamogeton natans` <int> 374, 398, 200, 179, 225, 484, 59, 183…
## $ `Potamogeton nodosus` <int> 0, 0, 7, 1, 0, 0, 7, 1, 4, 2, 1, 26, …
## $ `Potamogeton obtusifolius` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton praelongus` <int> 269, 784, 175, 349, 1234, 754, 1, 629…
## $ `Potamogeton pusillus` <int> 1, 21, 1, 8, 14, 22, 1, 52, 7, 41, 24…
## $ `Potamogeton richardsonii` <int> 335, 532, 238, 95, 677, 568, 5, 520, …
## $ `Potamogeton robbinsii` <int> 221, 1174, 58, 384, 328, 1306, 9, 805…
## $ `Potamogeton spirillus` <int> 0, 1, 13, 1, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ `Potamogeton strictifolius` <int> 12, 1, 0, 0, 1, 139, 0, 4, 0, 0, 2, 0…
## $ `Potamogeton vaseyi` <int> 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Potamogeton zosteriformis` <int> 1441, 2676, 840, 886, 3540, 3292, 39,…
## $ `Potentilla palustris` <int> 2, 0, 16, 14, 1, 2, 1, 0, 0, 0, 0, 0,…
## $ `Protectedspecies 1` <int> 1, 3, 58, 18, 2, 10, 0, 0, 14, 0, 0, …
## $ `Protectedspecies 10` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 11` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ `Protectedspecies 12` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, …
## $ `Protectedspecies 2` <int> 0, 0, 0, 0, 0, 49, 2, 12, 0, 0, 0, 12…
## $ `Protectedspecies 3` <int> 9, 0, 2, 0, 0, 0, 0, 10, 0, 0, 0, 3, …
## $ `Protectedspecies 4` <int> 0, 3, 0, 12, 0, 3, 0, 0, 0, 0, 0, 0, …
## $ `Protectedspecies 5` <int> 0, 64, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0,…
## $ `Protectedspecies 6` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 7` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 8` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Protectedspecies 9` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Ranunculus aquatilis` <int> 2, 0, 0, 0, 0, 6, 0, 3, 23, 0, 42, 47…
## $ `Ranunculus flabellaris` <int> 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0…
## $ `Ranunculus flammula` <int> 0, 14, 0, 3, 15, 0, 0, 0, 0, 0, 0, 0,…
## $ `Ranunculus longirostris` <int> 0, 0, 0, 24, 0, 44, 0, 6, 0, 9, 0, 0,…
## $ `Riccia fluitans` <int> 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0…
## $ `Ricciocarpos natans` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Rumex orbiculatus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, …
## $ `Sagittaria cristata` <int> 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0…
## $ `Sagittaria cuneata` <int> 3, 0, 4, 5, 1, 0, 0, 1, 3, 0, 0, 0, 0…
## $ `Sagittaria graminea` <int> 0, 0, 2, 4, 2, 1, 0, 31, 0, 0, 0, 0, …
## $ `Sagittaria latifolia` <int> 2, 2, 11, 1, 0, 26, 0, 0, 0, 0, 0, 2,…
## $ `Sagittaria rigida` <int> 0, 0, 7, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0…
## $ `Schoenoplectus acutus` <int> 480, 81, 151, 360, 24, 599, 25, 255, …
## $ `Schoenoplectus americanus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Schoenoplectus pungens` <int> 3, 3, 3, 4, 20, 15, 0, 3, 0, 0, 0, 0,…
## $ `Schoenoplectus subterminalis` <int> 8, 97, 36, 81, 24, 83, 0, 0, 3, 0, 0,…
## $ `Schoenoplectus tabernaemontani` <int> 1, 0, 6, 5, 4, 7, 0, 6, 7, 2, 10, 6, …
## $ `Schoenoplectus x oblongus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scirpus atrovirens` <int> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scirpus cyperinus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scirpus validus` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scolochloa festucacea` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scorpidium scorpioides` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Scutellaria lateriflora` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sium suave` <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0…
## $ `Sparganium americanum` <int> 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sparganium angustifolium` <int> 0, 1, 0, 1, 1, 10, 0, 0, 0, 0, 0, 0, …
## $ `Sparganium emersum` <int> 7, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Sparganium eurycarpum` <int> 4, 2, 13, 12, 14, 2, 0, 5, 2, 1, 0, 5…
## $ `Sparganium fluctuans` <int> 3, 2, 18, 12, 29, 14, 0, 0, 0, 0, 0, …
## $ `Sparganium natans` <int> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ `Sphagnum magellanicum` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Spirodela polyrhiza` <int> 31, 31, 12, 43, 33, 64, 0, 1, 121, 2,…
## $ `Stuckenia filiformis` <int> 19, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, …
## $ `Stuckenia pectinata` <int> 429, 246, 248, 189, 498, 553, 5, 438,…
## $ `Tolypella intricata` <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Triadenum fraseri` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Typha angustifolia` <int> 27, 11, 39, 41, 12, 157, 0, 82, 37, 8…
## $ `Typha glauca` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Typha latifolia` <int> 22, 4, 21, 2, 6, 33, 4, 0, 0, 6, 1, 4…
## $ `Utricularia gibba` <int> 1, 42, 3, 21, 30, 8, 11, 0, 46, 0, 0,…
## $ `Utricularia intermedia` <int> 41, 75, 11, 64, 95, 103, 0, 7, 28, 0,…
## $ `Utricularia macrorhiza` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Utricularia minor` <int> 23, 59, 29, 102, 52, 139, 0, 62, 8, 0…
## $ `Utricularia vulgaris` <int> 604, 929, 400, 466, 703, 963, 41, 127…
## $ `Vallisneria americana` <int> 526, 780, 425, 280, 1277, 446, 8, 558…
## $ `Veronica americana` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Wolffia borealis` <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ `Wolffia columbiana` <int> 0, 1, 0, 1, 0, 0, 0, 1, 6, 0, 173, 11…
## $ `Zannichellia palustris` <int> 1, 0, 0, 8, 1, 10, 0, 3, 0, 25, 102, …
## $ `Zizania palustris` <int> 1078, 808, 693, 894, 1026, 1133, 77, …
## $ acorus <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ alisma <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, …
## $ alnus <int> 0, 0, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0…
## $ bidens <int> 0, 0, 0, 0, 1, 9, 0, 0, 0, 0, 0, 0, 0…
## $ callitriche <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ carex <int> 19, 2, 53, 41, 10, 64, 4, 14, 8, 10, …
## $ ceratophyllum <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ chara <int> 1977, 6712, 1931, 1298, 4144, 4953, 6…
## $ characeae <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cicuta <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cyperaceae <int> 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0…
## $ drepanocladus <int> 38, 18, 71, 161, 5, 104, 3, 34, 37, 0…
## $ elatine <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ eleocharis <int> 53, 62, 47, 79, 91, 104, 1, 14, 12, 0…
## $ elodea <int> 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 2, 26, …
## $ equisetum <int> 9, 2, 44, 22, 6, 7, 0, 0, 0, 0, 1, 0,…
## $ eragrostis <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ eutrochium <int> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ hypericum <int> 0, 0, 0, 8, 0, 11, 0, 0, 0, 0, 0, 0, …
## $ impatiens <int> 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 1, 2, 1…
## $ iris <int> 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ isoetes <int> 0, 32, 3, 17, 12, 4, 0, 1, 0, 0, 0, 0…
## $ juncus <int> 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ lamiaceae <int> 0, 0, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0…
## $ lemna <int> 27, 4, 10, 8, 17, 27, 0, 7, 12, 0, 0,…
## $ lysimachia <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ myriophyllum <int> 8, 44, 2, 20, 0, 26, 0, 146, 0, 0, 14…
## $ najas <int> 218, 805, 62, 147, 754, 608, 0, 514, …
## $ nitella <int> 9, 13, 11, 162, 13, 143, 0, 9, 14, 0,…
## $ nuphar <int> 26, 11, 176, 0, 0, 238, 0, 0, 33, 1, …
## $ nymphaea <int> 16, 0, 119, 0, 0, 78, 0, 0, 38, 8, 0,…
## $ nymphaeaceae <int> 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0…
## $ persicaria <int> 2, 0, 0, 4, 1, 4, 0, 4, 1, 1, 3, 4, 0…
## $ poaceae <int> 2, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0…
## $ potamogeton <int> 203, 569, 174, 219, 1265, 648, 0, 397…
## $ `potamogeton (broad)` <int> 0, 1, 11, 0, 6, 5, 0, 2, 1, 0, 0, 0, …
## $ `potamogeton (narrow)` <int> 112, 73, 107, 138, 45, 420, 31, 119, …
## $ ranunculus <int> 91, 57, 192, 79, 292, 147, 4, 134, 10…
## $ riccia <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sagittaria <int> 63, 112, 116, 97, 180, 120, 8, 27, 56…
## $ salix <int> 0, 0, 1, 8, 1, 6, 0, 1, 1, 0, 0, 17, …
## $ schoenoplectus <int> 567, 551, 353, 255, 513, 409, 3, 186,…
## $ scirpus <int> 39, 0, 98, 0, 0, 108, 0, 47, 1, 22, 0…
## $ scutellaria <int> 0, 0, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ solidago <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ sparganium <int> 35, 34, 27, 24, 20, 47, 1, 28, 13, 0,…
## $ `sparganium (emergent)` <int> 7, 0, 5, 3, 2, 7, 0, 0, 0, 0, 1, 0, 0…
## $ `sparganium (floating)` <int> 5, 0, 36, 8, 16, 7, 0, 0, 0, 0, 3, 24…
## $ sphagnum <int> 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0…
## $ stuckenia <int> 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ typha <int> 34, 23, 193, 10, 7, 198, 0, 30, 44, 2…
## $ utricularia <int> 104, 15, 5, 17, 5, 376, 1, 353, 15, 0…
## $ verbena <int> 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0…
## $ wolffia <int> 0, 0, 0, 15, 3, 21, 0, 0, 2, 0, 0, 0,…
## $ zizania <int> 0, 0, 0, 260, 0, 99, 0, 33, 55, 1, 3,…
## $ zosterella <int> 0, 0, 0, 0, 0, 4, 0, 0, 13, 0, 0, 0, …
# A. Name: watershed
# Description: numeric code for watershed. matches to same field in other dataset. See also source file 4. from SHARING/ACCESS SECTION (DNR Watersheds 2023).
#
# B. Name: major_name
# Description: name of major watershed that corresponds to the watershed code. See also source file 4. from SHARING/ACCESS SECTION (DNR Watersheds 2023).
#
# C. Name: acres
# Description: acres encompassed by watershed. See also source file 4. from SHARING/ACCESS SECTION (DNR Watersheds 2023).
#
# D. Name: sq_mile
# Description: square miles encompassed by watershed. See also source file 4. from SHARING/ACCESS SECTION (DNR Watersheds 2023).
#
# E. Name: prod_year
# Description: The year of production associated with polygon linework. See also source file 4. from SHARING/ACCESS SECTION (DNR Watersheds 2023).
#
# F. Name: source
# Description: The source of polygon linework. See also source file 4. from SHARING/ACCESS SECTION (DNR Watersheds 2023).
#
# G. Name: n_points
# Description: number of points sampled in the watershed (not resampling of a points is unaccounted for, so resampled points are counted as n points where n = number of resamples)
#
# H. Name: n_species
# Description: number of unique taxa observed in the watershed
#
# I. Name: simpson_div_nat
# Description: inverse Simpson's diversity of watershed taxa community
#
# J.- END: [name of taxon observed in the database]
# Description: Number of observations of [named taxa] in this watershed
# fwrite(watershed_occurrence_wide[ ,.SD ,.SDcols = !c("geometry" )], file = "data&scripts/data/output/DRUM/watershed_occurrence_wide.csv")
# DataVizFigs -------------------------------------------------------
# **distribution of lakes and surveys ---------------------------------------
#plot with lakes shapes and point locs!!
# pwi_l[order_ID %in% unique(plants[ , order_ID]), ,]
#
# ggplot(pwi_l[order_ID %in% plants[ , unique(order_ID) , ] , , ], aes(geometry=geometry)) +
# geom_sf() +
# labs(caption = "Map of lakes with surveys in our database")
#Conversion of data frame to sf object to add points
plants_pts <- st_as_sf(x = plants[!is.na(Longitude)],
coords = c("Longitude", "Latitude"),
crs = "+proj=lonlat +datum=WGS84")
# #map points
# ggplot(plants_pts, aes(geometry=geometry)) +
# geom_sf() +
# labs(caption = "Map of survey points in our database")
#plot all together!
#other data for fig
# usa <- map_data("usa")
# canada <- map_data("world", region = "canada")
# states <- map_data("state")
states <- sf::st_as_sf(maps::map("state", plot = FALSE, fill = TRUE))
mn_df <- subset(states, ID == "minnesota")
#Projection transformation
plants_pts = st_transform(plants_pts, crs = "+proj=utm +zone=15")
pwi_l <- st_sf(pwi_l)
pwi_l <- st_transform(pwi_l, crs = st_crs(mn_df))
setDT(pwi_l)
watersheds_huc8 <- st_sf(watersheds_huc8)
watersheds_huc8 <- st_transform(watersheds_huc8, crs = st_crs(mn_df))
#map
study_map <- ggplot(data = pwi_l, aes(geometry=geometry))+
geom_sf(data = watersheds_huc8,aes(geometry = geometry), alpha = .05, color = "gray")+
geom_sf(alpha = .5, color = "blue")+
# geom_point(
# aes(geometry = geometry),
# stat = "sf_coordinates", color = "blue", alpha = 0.5)+
geom_point(data = pwi_l[order_ID %in% plants[ , unique(order_ID) , ] , , ],
stat = "sf_coordinates",
aes(geometry=geometry),
color = "red", alpha = .5)+
geom_sf(data = mn_df,aes(geometry = geom), color = "black", alpha = .05)+
scale_shape_discrete(solid = FALSE)+
theme(text = element_text(size=20), legend.position = )+
# theme_bw()+
ylab("Longitude")+
xlab("Latitude")
# study_map
pwi_l[order_ID %in% plants[ , unique(order_ID) , ] , plant_survey := T ,]
ggplot(pwi_l, aes(lake_class))+
scale_y_log10()+
geom_histogram( binwidth = 1 )+
geom_histogram(binwidth = 1, data =pwi_l[plant_survey == T], aes(lake_class), color = "red", alpha = .5)+
labs( title = "Distribution versus samping of Schupps lake classes \n
https://files.dnr.state.mn.us/publications/fisheries/investigational_reports/417.pdf")+
scale_x_continuous(breaks = seq(0,44,2) )
# geom_density(aes(color = plant_survey))
#lake area
ggplot(pwi_l, aes(acres.x))+
scale_x_log10()+
scale_y_log10()+
geom_histogram( )+
geom_histogram( data = pwi_l[plant_survey == T], aes(acres.x), color = "red", alpha = .5)
ggplot(pwi_l, aes(acres.x))+
# geom_histogram()+
scale_x_log10()+
geom_density(aes(color = plant_survey))
# hist(pwi_l$lake_class)
# temporal accumulation ----------------------------------------------------
# of surveys
plants[ , length(unique(SURVEY_ID)), year(SURVEY_DATE)]
## year V1
## <int> <int>
## 1: 2011 276
## 2: 2014 315
## 3: 2009 295
## 4: 2010 264
## 5: 2002 56
## 6: 2006 157
## 7: 2012 352
## 8: 2005 127
## 9: 2007 188
## 10: 2013 272
## 11: 2003 111
## 12: 2008 243
## 13: 2004 97
## 14: 2017 77
## 15: 2015 183
## 16: 2016 94
## 17: 2018 77
## 18: 2001 8
## 19: 2000 3
#of obs
plants[ , length(unique(OBS_ID)), year(SURVEY_DATE)]
## year V1
## <int> <int>
## 1: 2011 65833
## 2: 2014 62634
## 3: 2009 71907
## 4: 2010 65009
## 5: 2002 10736
## 6: 2006 40471
## 7: 2012 57732
## 8: 2005 27986
## 9: 2007 37378
## 10: 2013 49961
## 11: 2003 18285
## 12: 2008 63651
## 13: 2004 22362
## 14: 2017 26889
## 15: 2015 49523
## 16: 2016 30121
## 17: 2018 28381
## 18: 2001 2157
## 19: 2000 1282
#taxa
plants[!is.na(TAXON) , length(unique(TAXON)), year(SURVEY_DATE)]
## year V1
## <int> <int>
## 1: 2011 125
## 2: 2014 152
## 3: 2009 138
## 4: 2010 133
## 5: 2002 67
## 6: 2006 103
## 7: 2012 126
## 8: 2005 94
## 9: 2007 112
## 10: 2013 136
## 11: 2003 82
## 12: 2008 134
## 13: 2004 95
## 14: 2017 90
## 15: 2015 121
## 16: 2016 123
## 17: 2018 101
## 18: 2001 50
## 19: 2000 41
missing_data_surveys[ , .N , SURVEY_DATE]
## SURVEY_DATE N
## <IDat> <int>
## 1: <NA> 243
## 2: 2009-06-24 1
## 3: 2012-09-14 1
## 4: 2014-09-09 1
## 5: 2012-07-31 1
## 6: 2011-08-23 1
## 7: 2009-07-30 1
## 8: 2012-08-17 1
## 9: 2006-08-01 1
## 10: 2004-06-01 1
## 11: 2018-07-29 1
## 12: 2017-06-06 1
## 13: 2017-08-07 1
## 14: 2017-07-20 1
## 15: 2017-08-24 1
plotdat <- rbindlist(list(plants[ , first(SURVEY_DATE) , SURVEY_ID],missing_data_surveys[ , first(SURVEY_DATE) , SURVEY_ID]))[!is.na(V1)]
setorder(plotdat, V1)
plotdat[ , cumval := .I , ]
temporal_accumulation <- ggplot(plotdat, aes(V1, cumval)) +
geom_line()+
# theme_bw()+
xlab("Year")+
ylab("Cumulative Survey Count")+
theme(text = element_text(size=20), legend.position = )
# plotdat[ , metric := "surveys" , ]
#
# plotdat_pts <- plants[ , first(SURVEY_DATE) , POINT_ID]
# setorder(plotdat_pts, V1)
# plotdat_pts[ , cumval := .I , ]
# plotdat_pts[ , metric := "points" , ]
#
#
# plotdat_taxa <- plants[ , first(SURVEY_DATE) , TAXON]
# setorder(plotdat_taxa, V1)
# plotdat_taxa[ , cumval := .I , ]
# plotdat_taxa[ , metric := "taxa" , ]
#
#
# plotdat_all <- rbind(rbind(plotdat[ ,2:4 ], plotdat_pts[ ,2:4 ] ) , plotdat_taxa[ ,2:4 ])
#
# ggplot(plotdat_all, aes(V1, cumval)) +
# geom_line()+
# facet_wrap(~ metric, scales = "free")
#
# arrange
plots.row <- align_plots(study_map, temporal_accumulation, align="hv",
axis="tblr")
div.rows <- plot_grid(plots.row[[1]], plots.row[[2]],
nrow=1, label_size = 20,
label_fontface = "plain", labels = c("(a)", "(b)"),
hjust = -0, vjust = 2.4)
#write to file
# png(file = "Fig_Map_Time.png", width = 10, height = 5, units = "in", res = 1200)
div.rows
# dev.off()
# species abundance distributions -------------------------------------
sad.dat <- plants[!is.na(TAXON) , .N , TAXON]
setorder(sad.dat, -N)
sad.dat[ , TAXON := factor(TAXON, levels = sad.dat$TAXON)]
sad.dat[ , perc_abund := N/sum(sad.dat$N) , ]
# ggplot(plotdat[], aes(TAXON, perc_abund))+
# geom_point()+
# scale_y_log10()+
# xlab("Taxa")+
# ylab("log10(percent of all observations)")+
# theme_bw()+
# theme(axis.text.x = element_blank())
# write.csv(plotdat, "data&scripts/data/output/species_abund_list.csv")
Excluding unvegetated points, recalculating and ordering by perc-abund
sad.dat <-
sad.dat %>%
filter(TAXON != "") %>%
mutate(perc_abund = N/sum(N)) %>%
arrange(desc(perc_abund)) %>%
mutate(rank = 1:n()) %>%
rename(Taxon = TAXON)
sum(sad.dat$perc_abund)
## [1] 1
#' Most and least abundant
top1 <-
sad.dat %>%
filter(rank %in% 1:22) %>%
dplyr::select(Taxon, N)
top2 <-
sad.dat %>%
filter(rank %in% 23:50) %>%
dplyr::select(Taxon, N)
sad.plot <- ggplot(sad.dat, aes(x = rank, y = perc_abund)) +
geom_segment(aes(xend = 105, y = 0.03, x = 52, yend = 0.03),
arrow = arrow(length = unit(0.25, "cm")), color = "black") +
geom_table(data = top1, aes(x = 175, y = 0.26, label=list(top1)), size = 2,
table.theme=ttheme_gtsimple, vjust = "top", hjust = "right") +
geom_table(data = top2, aes(x = 237, y = 0.26, label=list(top2)), size = 2,
table.theme=ttheme_gtsimple, vjust = "top", hjust = "right") +
geom_point(shape = 19, size = 2, alpha = 0.5, color = "steelblue") +
geom_rect(aes(xmin = -2, xmax = 52, ymin = 0.0015, ymax = 0.18),
fill = "transparent", color = "gray30", linetype = 1) +
scale_y_log10(breaks = c(0.1, 0.01, 0.001, 0.0001, 0.00001),
labels = c(0.1, 0.01, 0.001, expression(paste("1 × 10"^{-4})),
expression(paste("1 × 10"^{-5})))) +
scale_x_continuous(breaks = c(seq(0, 200, 50))) +
xlab(expression(Species~(italic(n)^{th}~most~abundant))) +
ylab("Proportion of total abundance") +
coord_cartesian(xlim = c(5, 225), ylim = c(0.000002, 0.15)) +
theme(panel.grid.minor=element_blank()) +
annotate("text", x = 80, y = 0.03, size = 3, adj = 0.5,
label = "50 most\nabundant taxa")
# png(file = "Fig_SAD.png", width = 5.5, height = 5, units = "in", res = 1200)
sad.plot
# dev.off()
# diversity environment relationships ------------------------------------
summary(plants_rakeabund_wide$simpsons_div_nat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 1.8 3.6 Inf Inf Inf
summary(plants_rakeabund_wide$DEPTH_FT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.900 6.300 8.463 10.800 146.000
# desc(sort(plants_rakeabund_wide$DEPTH_FT))
#' Removing zero depth, extreme depth, and Inf ENSpie points
plants_rakeabund_wide <-
plants_rakeabund_wide %>%
filter(simpsons_div_nat != "Inf") %>%
filter(DEPTH_FT != 0) %>%
filter(DEPTH_FT < 50)
label_depth <- "Point~scale~(italic(N) == 70745)"
point_depth <- ggplot(plants_rakeabund_wide, aes(DEPTH_FT*0.348, simpsons_div_nat)) +
geom_point(shape = 19, size = 1, alpha = 0.1, color = "steelblue4")+
scale_x_log10() +
geom_smooth(method = 'gam', color = "black") +
ylab(expression(italic(ENS[PIE]))) +
xlab("Water depth (m)") +
ylim(c(0, 15)) +
annotate("text", x = 0.08, y = 15 - 0.025*15, size = 2.25, adj = 0,
label = label_depth, parse = TRUE) +
theme(text = element_text(size = 7), plot.margin = unit(c(0.1, 0, 0.1, 0.1), "cm"))
Removing NAs
summary(surveys$simpsons_div_nat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.385 4.144 4.938 6.959 20.761
summary(surveys$Secchi_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0100 0.6096 1.2192 1.6390 2.2000 11.1000 239
surveys <-
surveys %>%
filter(!is.na(Secchi_m))
label_Secchi <- "Lake~scale~(italic(N) == 2955)"
lake_Secchi <- ggplot(surveys, aes(Secchi_m, simpsons_div_nat)) +
geom_point(shape = 19, size = 1.5, alpha = 0.3, color = "steelblue4")+
geom_smooth(method = 'gam', color = "black") +
ylab("") +
xlab("Secchi depth (m)") +
ylim(c(0, 21)) +
annotate("text", x = 0.07, y = 21 - 21*0.025, size = 2.25, adj = 0,
label = label_Secchi, parse = TRUE) +
theme(text = element_text(size = 7), plot.margin = unit(c(0.1, 0, 0.1, 0), "cm"))
Removing NAs
summary(watersheds_huc8$simpson_div_nat)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.982 6.333 11.182 Inf 16.178 Inf 14
watersheds_huc8 <-
watersheds_huc8 %>%
dplyr::select(-geometry) %>%
filter(!is.na(simpson_div_nat)) %>%
filter(simpson_div_nat != "Inf")
label_area <- "Watershed~scale~(italic(N) == 64)"
wshed_area <- ggplot(watersheds_huc8, aes(acres*0.404686, simpson_div_nat)) +
geom_point(shape = 19, size = 2, alpha = 0.6, color = "steelblue4")+
geom_smooth(method = 'gam', color = "black") +
ylab("") +
xlab("Area (ha)") +
ylim(c(0, 30)) +
scale_x_continuous(breaks = c(0, 200000, 400000, 600000),
labels = c(0, "200,000", "400,000", "600,000")) +
annotate("text", x = 0, y = 30 - 30*0.025, size = 2.25, adj = 0,
label = label_area, parse = TRUE) +
theme(text = element_text(size = 7), plot.margin = unit(c(0.1, 0.1, 0.1, 0), "cm"))
plots.row <- align_plots(point_depth, lake_Secchi, wshed_area, align="hv",
axis="tblr")
div.rows <- plot_grid(plots.row[[1]], plots.row[[2]], plots.row[[3]],
nrow=1, label_size = 7.5,
label_fontface = "plain", labels = c("(a)", "(b)", "(c)"),
hjust = -0.25, vjust = 2)
# png(file = "Fig_DivEnv.png", width = 6.5, height = 2.25, units = "in", res = 1200)
div.rows
# dev.off()
# footer ------------------------------------------------------------------