###DATA MANIPULATION CODE### ###Project: AS Code and data### ###Working directory: Box### ###Code to examine potential participants #Counting number of counties represented data<-read.csv("My Documents/Data_with_identifiers_alltypes CSV.csv") #How many represented from each state? states<-data$CUST_STATE unique.state<-unique(states, incomparables = FALSE) length(unique.state) counts_per_state<-aggregate(data.frame(count = states), list(value = states), length) #counts the number of entries per unique value in the dataset for state counts_per_state #How many represented from each county? counties<-data$CNTYNAME unique.county<-unique(counties, incomparables = FALSE) length(unique.county) counts_per_county<-aggregate(data.frame(count = counties), list(value = counties), length) #counts the number of entries per unique value in the dataset for counties address<-unique(data$CUST_STREET, incomparables = FALSE) length(address) dups<-duplicated(address) length(dups) #The following code was used to generate the "linking document" found in the Box Drive. #library(ids) #data$IDs<-random_id(n=4002, bytes=3) #library(openxlsx) #write.xlsx(data, "C:/Users/thom4412/Box/My Documents/Linking document.xlsx") #writes the current dataframe into XLSX file, saved on Box as "Linking Document" ###### Tracking survey response rate ##################################### library(readxl) library(tidyverse) mailing<-read_excel("C:/Users/thom4412/Desktop/MAISRC RFP 2018/Activity 2 Angler Survey/AS Code and data/01282021_Mailing Tracking Document.xlsx") mailing %>% group_by(`Completed Survey`)%>% summarize(n=n()) ###Examining response patterns#### linking<-read.csv("My Documents/Linking document2 CSV.csv") merged<-merge(linking,mailing, by="IDs") #create admin region variable merged$CNTYNAME<-trimws(merged$CNTYNAME) #remove extra white space from cnty names ref <- data.frame(CNTYNAME = c("Koochiching","Lake","Itasca", "Saint Louis", "Cook", "Carlton", "Pine", "Aitkin", "Crow Wing", "Mille Lacs", "Kanabec", "Morrison","Isanti","Anoka","Chisago","Todd","Benton","Sherburne","Stearns","Wright","Hennepin","Ramsey","Washington","Carver","Scott","Dakota","Goodhue","Wabasha","Olmsted","Winona","Fillmore","Houston", "Kittson","Roseau","Lake of the Woods","Pennington","Marshall","Red Lake", "Polk","Norman","Mahnomen","Clearwater","Hubbard","Beltrami","Wadena","Cass","Becker","Clay","Wilkin","Otter Tail","Traverse","Grant","Douglas","Stevens","Pope", "Big Stone","Swift","Chippewa","Kandiyohi","Meeker", "McCleod","Sibley","Renville","Lac Qui Parle","Yellow Medicine","Lincoln","Lyon","Brown","Redwood","Nicollet","Le Sueur","Rice","Dodge","Steele","Waseca","Blue Earth", "Watonwan","Cottonwood","Murray","Pipestone","Rock","Nobles","Jackson","Martin","Faribault","Freeborn","Mower"), admin=c(rep("Northeast",9),rep("Central",23), rep("Northwest",23),rep("South",32)), stringsAsFactors = FALSE) merged<-merge(merged,ref,by="CNTYNAME",all.x = TRUE) merged$admin[is.na(merged$admin)] <- "out of state" #filter out zeros aka people whose survey did not reach them merged$completed.survey<-ifelse(merged$`Completed Survey`==1|merged$`Completed Survey`==2,"yes",ifelse(merged$`Completed Survey`==0,"bb","no")) merged$completed.survey[is.na(merged$completed.survey)] <- "no" merged<-filter(merged,completed.survey!="bb") #Remove the responses that were from duplicated IDs merged<-merged[!duplicated(merged$IDs), ] merged %>% group_by(admin, completed.survey) %>% summarize(n=n()) #chisq test to check for independence library(MASS) #library(RVAideMemoire) merged.table<-table(merged$admin,merged$completed.survey) chisq.test(merged.table) #seeing significant differences let's look closer #chisq.multcomp(merged.table) #Global chisq shows significance...post hoc test below chisq.test(merged.table)$stdres #call standardized residuals sig<-0.05 #set significance level sigAdj<-sig/(nrow(merged.table)*ncol(merged.table)) qnorm(sigAdj/2) merged.frame<-data.frame(merged.table[,"no"],merged.table[,"yes"]) #convert to dataframe colnames(merged.frame)<-c("no","yes") #rename columns merged.frame<-mutate(merged.frame, prop.resp=yes/(yes+no),total=yes+no) #global proportion test prop.test(x=merged.table[,c("yes","no")]) #have to switch column order to match syntax of prop.test func #pairwise with bonferroni correction pairwise.prop.test(merged.table[,c("yes","no")],p.adjust.method = "bonferroni") #switch column order, apply bonferroni correction #Age difference of respondents vs gen pop? survey.responses<-read.csv("Meg's Datasets/Identified Responses20210122.csv", strip.white = TRUE) #update with latest filename/date; see "Linking responses with identities" code for how the identified responses document was generated. #trim white spaces from strings: library(data.table) survey.responses<-fread("Meg's Datasets/Identified Responses20210122.csv", strip.white = TRUE, data.table = FALSE)#File will be named with 1824444_Thompson_CombinedData_date of download. #Remove the responses that were from duplicated IDs survey.responses<-survey.responses[!duplicated(survey.responses$PrimaryID), ] mean(survey.responses$CUST_AGE) mean(merged$CUST_AGE) #create age category variable in the merged dataset n<-length(merged$IDs) merged$age_cat<-NA for (i in 1:n){ merged$age_cat[i]<-ifelse(merged$CUST_AGE[i]>=18 & merged$CUST_AGE[i]<=39,"18-39", ifelse(merged$CUST_AGE[i]>=40 & merged$CUST_AGE[i]<=49,"40-49", ifelse(merged$CUST_AGE[i]>=50 & merged$CUST_AGE[i]<=59,"50-59", ifelse(merged$CUST_AGE[i]>=60 & merged$CUST_AGE[i]<=69,"60-69", ifelse(merged$CUST_AGE[i]>=70,"70+",NA))))) } mageTab<-merged %>% group_by (completed.survey,age_cat) %>% summarize (n=n()) #create age category variable in the survey responses dataset m<-length(survey.responses$PrimaryID) survey.responses$age_cat<-NA for (i in 1:m){ survey.responses$age_cat[i]<-ifelse(survey.responses$CUST_AGE[i]>=18 & survey.responses$CUST_AGE[i]<=39,"18-39", ifelse(survey.responses$CUST_AGE[i]>=40 & survey.responses$CUST_AGE[i]<=49,"40-49", ifelse(survey.responses$CUST_AGE[i]>=50 & survey.responses$CUST_AGE[i]<=59,"50-59", ifelse(survey.responses$CUST_AGE[i]>=60 & survey.responses$CUST_AGE[i]<=69,"60-69", ifelse(survey.responses$CUST_AGE[i]>=70,"70+",NA))))) } sageTab<-survey.responses %>% group_by (age_cat) %>% summarize (n=n()) #tabulate response rates by age between the two data frames ageTab<-table(merged$age_cat,merged$completed.survey) #global proportion test prop.test(x=ageTab[,c("yes","no")]) #pairwise pairwise.prop.test(x=ageTab[,c("yes","no")], p.adjust.method = "bonferroni") #these below are not corrected for multiple comparisons: prop.test(x=c(306,2517), n=c(671,3788)) #18-39s significantly underrepresented in our sample prop.test(x=c(63,326), n=c(671,3788)) #40-49 accurately represented in our sample prop.test(x=c(97,365), n=c(671,3788)) #50-59 significantly overrepresented in our sample prop.test(x=c(129,388), n=c(671,3788)) #60-69 significantly overrepresented prop.test(x=c(76,192), n=c(671,3788)) #70+ significantly overrepresented in our sample #Apply weights to survey response calculation: #weight=1/(sampleprop/populationprop) weight1839<-1/(0.45/0.66) weight4049<-1/(0.094/0.086) weight5059<-1/(0.14/0.09) weight6069<-1/(0.19/0.10) weight70plus<-1/(0.11/0.05) #Store age weights in weights array weightsarray<-c(weight1839,weight4049,weight5059,weight6069,weight70plus) #### Survey responses manipulation ############################################## library(tidyverse) library(fitdistrplus) ## 1/22/2021 Changes made to file in excel prior to reading in: #4/6/20 made some adjustments in excel file, including changing "doz" to 12 and creating columns for presence/absence #4/8/20 for fish numbers estimated using "scoop" I estimated a number of fish using the following: #FHM runif(1,min=30,max=36) rounded to the nearest whole #other shiners runif(1,30,36) #redbelly dace aka "rainbows" runif(1,25,30) #multiplied by the number of scoops to get an estimate #Jan 22 2021 note: created columns Q16_1_release through Q16_5_noneleft to extract information from multiple select Q16. MyTheme = theme(text = element_text(size=24), axis.text.x = element_text(size=14), axis.text.y = element_text(size=24)) survey.responses<-read.csv("Meg's Datasets/Identified Responses20210122.csv", strip.white = TRUE) #update with latest filename/date; see "Linking responses with identities" code for how the identified responses document was generated. #trim white spaces from strings: library(data.table) survey.responses<-fread("Meg's Datasets/Identified Responses20210122.csv", strip.white = TRUE, data.table = FALSE)#File will be named with 1824444_Thompson_CombinedData_date of download. #responses.fin<-filter(survey.responses, Finished=="True") #don't need this anymore since the Identified Responses file already is filtered for completion variables<-read.csv("C:/Users/thom4412/Box/1824444_Thompson/1824444_Thompson_CombinedData_VariableLabels.csv") #Remove the responses that were from duplicated IDs survey.responses<-survey.responses[!duplicated(survey.responses$PrimaryID), ] #create age category variable m<-length(survey.responses$PrimaryID) survey.responses$age_cat<-NA for (i in 1:m){ survey.responses$age_cat[i]<-ifelse(survey.responses$CUST_AGE[i]>=18 & survey.responses$CUST_AGE[i]<=39,"18-39", ifelse(survey.responses$CUST_AGE[i]>=40 & survey.responses$CUST_AGE[i]<=49,"40-49", ifelse(survey.responses$CUST_AGE[i]>=50 & survey.responses$CUST_AGE[i]<=59,"50-59", ifelse(survey.responses$CUST_AGE[i]>=60 & survey.responses$CUST_AGE[i]<=69,"60-69", ifelse(survey.responses$CUST_AGE[i]>=70,"70+",NA))))) } #Calculate weights for each age #weight=1/(sampleprop/populationprop) weight1839<-1/(0.45/0.66) weight4049<-1/(0.094/0.086) weight5059<-1/(0.14/0.09) weight6069<-1/(0.19/0.10) weight70plus<-1/(0.11/0.05) weightsarray<-c(weight1839,weight4049,weight5059,weight6069,weight70plus) # Rename columns for number of days of fishing per month. (Survey Q1) survey.responses<- rename(survey.responses, Days_Mar18="Q1_1_1") %>% rename(Days_Apr18="Q1_1_2") %>% rename(Days_May18="Q1_1_3") %>% rename(Days_Jun18="Q1_1_4") %>% rename(Days_Jul18="Q1_1_5") %>% rename(Days_Aug18="Q1_1_6") %>% rename(Days_Sep18="Q1_1_7") %>% rename(Days_Oct18="Q1_1_8") %>% rename(Days_Nov18="Q1_1_9") %>% rename(Days_Dec18="Q1_1_10") %>% rename(Days_Jan19="Q1_1_11") %>% rename(Days_Feb19="Q1_1_12") # Rename columns for number of days of baitfish use per month. (Survey Q2) survey.responses <- rename(survey.responses, Days_bait_Mar18="Q2_1_1") %>% rename(Days_bait_Apr18="Q2_1_2") %>% rename(Days_bait_May18="Q2_1_3") %>% rename(Days_bait_Jun18="Q2_1_4") %>% rename(Days_bait_Jul18="Q2_1_5") %>% rename(Days_bait_Aug18="Q2_1_6") %>% rename(Days_bait_Sep18="Q2_1_7") %>% rename(Days_bait_Oct18="Q2_1_8") %>% rename(Days_bait_Nov18="Q2_1_9") %>% rename(Days_bait_Dec18="Q2_1_10") %>% rename(Days_bait_Jan19="Q2_1_11") %>% rename(Days_bait_Feb19="Q2_1_12") # Rename columns for harvest frequency and location (Paper Survey Section 1) survey.responses <- rename(survey.responses, harvest_frequency="Q3") %>% #Never, Rarely, Often rename(harvest_zonea="Q5_1") %>% rename(harvest_zoneb="Q5_2") %>% rename(harvest_zonec="Q5_3") %>% #rename(harvest_outside="Q5a") %>% #Zones where they fished with the live baitfish they harvested (paper survey Q5) rename(harvest_fisha="Q7_1") %>% rename(harvest_fishb="Q7_2") %>% rename(harvest_fishc="Q7_3") %>% rename(harvest_fishoutside="Q5a") # Rename columns for purchase frequency and location (Paper Survey Section 2) survey.responses <- rename(survey.responses, purchase_frequency="Q10") %>% #Never, Rarely, Often rename(purchase_zonea="Q12_1") %>% rename(purchase_zoneb="Q12_2") %>% rename(purchase_zonec="Q12_3") %>% rename(purchase_outside="Q12a") %>% #Zones where they fished with the live baitfish they purchased (paper survey Q12) rename(purchase_fisha="Q14_1") %>% rename(purchase_fishb="Q14_2") %>% rename(purchase_fishc="Q14_3") %>% rename(purchase_fishoutside="Q14a") ####create variable for number of fishing days per year #first make NAs for number of days fishing 0s survey.responses$Days_Mar18 <- replace_na(survey.responses$Days_Mar18, 0) survey.responses$Days_Apr18 <- replace_na(survey.responses$Days_Apr18, 0) survey.responses$Days_May18 <- replace_na(survey.responses$Days_May18, 0) survey.responses$Days_Jun18 <- replace_na(survey.responses$Days_Jun18, 0) survey.responses$Days_Jul18 <- replace_na(survey.responses$Days_Jul18, 0) survey.responses$Days_Aug18 <- replace_na(survey.responses$Days_Aug18, 0) survey.responses$Days_Sep18 <- replace_na(survey.responses$Days_Sep18, 0) survey.responses$Days_Oct18 <- replace_na(survey.responses$Days_Oct18, 0) survey.responses$Days_Nov18 <- replace_na(survey.responses$Days_Nov18, 0) survey.responses$Days_Dec18 <- replace_na(survey.responses$Days_Dec18, 0) survey.responses$Days_Jan19 <- replace_na(survey.responses$Days_Jan19, 0) survey.responses$Days_Feb19 <- replace_na(survey.responses$Days_Feb19, 0) #make NAs for number of days fishing with bait 0s survey.responses$Days_bait_Mar18 <- replace_na(survey.responses$Days_bait_Mar18, 0) survey.responses$Days_bait_Apr18 <- replace_na(survey.responses$Days_bait_Apr18, 0) survey.responses$Days_bait_May18 <- replace_na(survey.responses$Days_bait_May18, 0) survey.responses$Days_bait_Jun18 <- replace_na(survey.responses$Days_bait_Jun18, 0) survey.responses$Days_bait_Jul18 <- replace_na(survey.responses$Days_bait_Jul18, 0) survey.responses$Days_bait_Aug18 <- replace_na(survey.responses$Days_bait_Aug18, 0) survey.responses$Days_bait_Sep18 <- replace_na(survey.responses$Days_bait_Sep18, 0) survey.responses$Days_bait_Oct18 <- replace_na(survey.responses$Days_bait_Oct18, 0) survey.responses$Days_bait_Nov18 <- replace_na(survey.responses$Days_bait_Nov18, 0) survey.responses$Days_bait_Dec18 <- replace_na(survey.responses$Days_bait_Dec18, 0) survey.responses$Days_bait_Jan19 <- replace_na(survey.responses$Days_bait_Jan19, 0) survey.responses$Days_bait_Feb19 <- replace_na(survey.responses$Days_bait_Feb19, 0) #add to get total trips per year n<-length(survey.responses$PrimaryID) for (i in 1:n) { survey.responses$trips.per.year[i]<-sum(survey.responses$Days_Mar18[i], survey.responses$Days_Apr18[i], survey.responses$Days_May18[i], survey.responses$Days_Jun18[i], survey.responses$Days_Jul18[i], survey.responses$Days_Aug18[i], survey.responses$Days_Sep18[i], survey.responses$Days_Oct18[i], survey.responses$Days_Nov18[i], survey.responses$Days_Dec18[i], survey.responses$Days_Jan19[i], survey.responses$Days_Feb19[i])} #Store current dataframe as object to go back to if issues arise #save(survey.responses, file="survey_responses.RData") #load("survey_responses.RData") #Remove problematic rows that have no response for either purchase or harvest of live baitfish and did not fill anything out survey.responses$bad<- ifelse(survey.responses$harvest_frequency=="" & survey.responses$purchase_frequency=="","bad","good") #add a column on which to filter responses survey.responses<-filter(survey.responses, bad=="good") #selects only "good" rows where people actually filled it out and went fishing #Create columns for behavior during the month in question #live.bait is a column with three levels: Harvest, Purchase, or did not use livebait survey.responses$live.bait<- ifelse(survey.responses$harvest_frequency=="Rarely", "Harvest", ifelse(survey.responses$harvest_frequency=="Often", "Harvest", ifelse(survey.responses$purchase_frequency=="Rarely", "Purchase", ifelse(survey.responses$purchase_frequency=="Often","Purchase", ifelse(survey.responses$purchase_frequency=="Never" & survey.responses$harvest_frequency=="Never", "Did not use live bait", ifelse(survey.responses$purchase_frequency=="" &survey.responses$harvest_frequency=="Never", "Did not use live bait", ifelse(survey.responses$purchase_frequency=="Never" &survey.responses$harvest_frequency=="", "Did not use live bait","Did not use live bait"))))))) #Summarize bait use and plot survey.responses %>% group_by(live.bait) %>% summarize(n=n()) baituse.plot<-ggplot(survey.responses)+aes(live.bait)+geom_bar(color="darkturquoise", fill="darkturquoise",width = 0.5)+ggtitle("Bait source among surveyed Minnesota anglers")+MyTheme+xlab("Source of live baitfish") baituse.plot #Create variable for administrative regions based on county survey.responses$CNTYNAME<-trimws(survey.responses$CNTYNAME) #remove extra white space from cnty names ref <- data.frame(CNTYNAME = c("Koochiching","Lake","Itasca", "Saint Louis", "Cook", "Carlton", "Pine", "Aitkin", "Crow Wing", "Mille Lacs", "Kanabec", "Morrison","Isanti","Anoka","Chisago","Todd","Benton","Sherburne","Stearns","Wright","Hennepin","Ramsey","Washington","Carver","Scott","Dakota","Goodhue","Wabasha","Olmsted","Winona","Fillmore","Houston", "Kittson","Roseau","Lake of the Woods","Pennington","Marshall","Red Lake", "Polk","Norman","Mahnomen","Clearwater","Hubbard","Beltrami","Wadena","Cass","Becker","Clay","Wilkin","Otter Tail","Traverse","Grant","Douglas","Stevens","Pope", "Big Stone","Swift","Chippewa","Kandiyohi","Meeker", "McCleod","Sibley","Renville","Lac Qui Parle","Yellow Medicine","Lincoln","Lyon","Brown","Redwood","Nicollet","Le Sueur","Rice","Dodge","Steele","Waseca","Blue Earth", "Watonwan","Cottonwood","Murray","Pipestone","Rock","Nobles","Jackson","Martin","Faribault","Freeborn","Mower"), admin=c(rep("Northeast",9),rep("Central",23), rep("Northwest",23),rep("South",32)), stringsAsFactors = FALSE) survey.responses<-merge(survey.responses,ref,by="CNTYNAME",all.x = TRUE) survey.responses$admin[is.na(survey.responses$admin)] <- "out of state" survey.responses %>% group_by(CUST_STATE,admin) %>% summarize(n=n()) #plot bait use by administrative area baituse.plot2<-ggplot(survey.responses)+aes(live.bait,fill=admin)+geom_bar(width = 0.5)+ggtitle("Bait source among surveyed Minnesota anglers")+MyTheme+xlab("Source of live baitfish") baituse.plot2 survey.responses %>% group_by(admin,live.bait) %>% summarize(n=n()) chisq.test(survey.responses$live.bait,survey.responses$admin) #plot bait use by administrative area, proportion baituse.plot3<-ggplot(survey.responses)+aes(admin, fill=live.bait)+geom_bar(width = 0.5,position="fill")+ggtitle("Bait source among surveyed Minnesota anglers")+MyTheme+xlab("Region") baituse.plot3 #Segment based on bait use survey.responses.harvesters<-filter(survey.responses,live.bait=="Harvest") survey.responses.purchasers<-filter(survey.responses,live.bait=="Purchase") survey.responses.baitusers<-rbind(filter(survey.responses,live.bait=="Harvest"),filter(survey.responses,live.bait=="Purchase")) survey.responses.nonbait<-filter(survey.responses,live.bait=="Did not use live bait") #Weighted release rate sbdt <- data.table(survey.responses.baitusers) sbdt[, .N, by = .(age_cat)] sbdt[ , .N, by = .(age_cat, Q16_1_release)] sbdt2 <- merge(sbdt[ , .N, by = .(age_cat, Q16_1_release)],sbdt[, .N, by = .(age_cat)], by = "age_cat") #Creates one dataframe with number of respondents by disposal method per location as well as total number of respondents per location sbdt2[,prop := N.x/N.y , ] release.by.age<-sbdt2[Q16_1_release==TRUE,prop] #array of proportions of anglers who released per age category #weighted average release rate: sum(release.by.age*weightsarray)/sum(weightsarray) #weighted use of live bait survey.responses %>% group_by(age_cat,live.bait) %>% summarize (n=n()) srdt<-data.table(survey.responses) srdt2<- merge(srdt[ , .N, by = .(age_cat, live.bait)],srdt[, .N, by = .(age_cat)], by = "age_cat") #Creates one dataframe with number of respondents by disposal method per location as well as total number of respondents per location srdt2[,prop := N.x/N.y , ] srdt2 use.by.age<-srdt2[live.bait=="Did not use live bait",prop] #array of proportions of anglers who did not use live bait by age sum(use.by.age*weightsarray)/sum(weightsarray) survey.responses.baitusers$disposal_method<- ifelse(survey.responses.baitusers$Q16_1_release==TRUE,"Release in water", ifelse(survey.responses.baitusers$Q16_2_dispose==TRUE,"Properly disposed", ifelse(survey.responses.baitusers$Q16_3_giveaway==TRUE, "Gave away", ifelse(survey.responses.baitusers$Q16_4_other==TRUE,"Other", ifelse(survey.responses.baitusers$Q16_5_noneleft==TRUE,"None leftover", #code for harvest disposal methods: ifelse(survey.responses.baitusers$Q9=="Released into different water from which it was caught","Release in water", ifelse(survey.responses.baitusers$Q9=="Released into the same water from which it was caught", "Back to source", ifelse(survey.responses.baitusers$Q9=="Disposed on land/in garbage","Properly disposed", ifelse(survey.responses.baitusers$Q9=="Gave to a friend", "Gave away", ifelse(survey.responses.baitusers$Q9=="Other (please describe)","Other", ifelse(survey.responses.baitusers$Q9=="I did not have any leftover baitfish","None leftover", #code for no response ifelse(survey.responses.baitusers$Q9==""& survey.responses.baitusers$Q16=="", "Did not respond", "")))))))))))) #save current frame #save(survey.responses.baitusers, file="survey_responses_baitusers.RData") #plot survey.responses.baitusers %>% group_by(disposal_method) %>% summarize(n=n()) #plot for papers: library(forcats) survey.responses.baitusers %>% count(disposal_method) %>% mutate(disposal_method = fct_reorder(disposal_method, n, .desc = TRUE)) %>% ggplot(aes(x = disposal_method, y = n)) + geom_bar(stat="identity")+ coord_flip()+ theme_bw() #plot for presentations: disposal.plot<-ggplot(survey.responses.baitusers)+ aes(x=disposal_method, fill=live.bait) + geom_bar()+ coord_flip() + ggtitle("Method of disposal by bait source among surveyed Minnesota anglers") + xlab("Method of disposal") + labs(fill = "Source") + #theme(MyTheme) theme_bw()+ theme(text = element_text(size=24), axis.text.x = element_text(size=18), axis.text.y = element_text(size=24)) disposal.plot #plot disposal by admin area disposal.plot2<-ggplot(survey.responses.baitusers)+ aes(disposal_method, fill=admin) + geom_bar()+ coord_flip() + ggtitle("Method of disposal by bait source among surveyed Minnesota anglers") + xlab("Method of disposal") + labs(fill = "Region") + theme(text = element_text(size=24), axis.text.x = element_text(size=18), axis.text.y = element_text(size=24)) + theme_bw() + scale_fill_grey() disposal.plot2 #disposal plot by admin region of residence #make a column for binary yes/no release or not--REMOVED 1/22/21 because colum q16_1_release answers this question #survey.responses.baitusers$releaseyn<-ifelse(survey.responses.baitusers$disposal_method=="Release in water","yes","no") #plot release percentage by admin area GRAYSCALE for publication: disposal_by_admin_grey.jpeg library(dplyr) p <- survey.responses.baitusers %>% mutate(admin = fct_relevel(admin, "Central", "South","Northeast","Northwest", "out of state")) %>% ggplot( aes(x=admin, fill=disposal_method)) + geom_bar(position = "fill") + MyTheme + xlab("Region" ) + ylab("proportion") + theme(axis.text.x = element_text(angle=45,vjust = .5)) + theme_bw()+ scale_fill_grey()+ labs(fill="Disposal method") p #plot release percentage by admin area COLOR for publication: disposal_by_admin_grey.jpeg #only 6 categories, release back to source was grouped with "properly disposed", colorblind friendly, okay for grayscale printing pcolor <- survey.responses.baitusers %>% mutate(admin = fct_relevel(admin, "Central", "South","Northeast","Northwest", "out of state"))%>% mutate(disposal_method = ifelse(as.character(disposal_method)=="Back to source","Properly disposed", as.character(disposal_method))) %>% mutate(disposal_method=fct_relevel(disposal_method, "Did not respond", "None leftover", "Other","Gave away", "Properly disposed", "Release in water")) %>% ggplot( aes(x=admin, fill=disposal_method)) + geom_bar(position = "fill") + MyTheme + xlab("Region" ) + ylab("proportion") + theme(axis.text.x = element_text(angle=45,vjust = .5)) + theme_bw()+ scale_fill_manual(values=c("#FFDBD4", "#FFC107", "#EF620D","#1E88E5","#D81B60", "#004D40"))+ labs(fill="Disposal method") pcolor #plot release percentage by admin area PATTERN library(ggpattern) ppattern <- survey.responses.baitusers %>% mutate(admin = fct_relevel(admin, "Central", "South","Northeast","Northwest", "out of state")) %>% ggplot(aes(fill=disposal_method, x=admin)) + geom_bar_pattern(position = "fill", aes(pattern=disposal_method, pattern_angle=disposal_method, pattern_spacing=disposal_method), fill = 'white', colour = 'black', pattern_density = 0.1, pattern_fill = 'black', pattern_colour = 'black') + MyTheme + xlab("Region" ) + ylab("proportion") + theme(axis.text.x = element_text(angle=45,vjust = .5)) + theme_bw()+ labs(fill="Disposal method") ppattern #test for significant differences in release behavior across different regions of residence library(MASS) region.table<-table(survey.responses.baitusers$admin,survey.responses.baitusers$Q16_1_release) chisq.test(region.table) #no significant difference, however small cell values (ie low statistical power) ####Create columns for destination fishing zones#### survey.responses.baitusers <- mutate_at(survey.responses.baitusers,c("harvest_fisha", "harvest_fishb", "harvest_fishc", "purchase_fisha", "purchase_fishb", "purchase_fishc"), as.character) ###Change zone NAs to 0s so that the ifelse commands below don't return NAs##### survey.responses.baitusers$harvest_fisha <- replace_na(survey.responses.baitusers$harvest_fisha, 0) survey.responses.baitusers$harvest_fishb <- replace_na(survey.responses.baitusers$harvest_fishb, 0) survey.responses.baitusers$harvest_fishc <- replace_na(survey.responses.baitusers$harvest_fishc, 0) survey.responses.baitusers$purchase_fisha <- replace_na(survey.responses.baitusers$purchase_fisha, 0) survey.responses.baitusers$purchase_fishb <- replace_na(survey.responses.baitusers$purchase_fishb, 0) survey.responses.baitusers$purchase_fishc <- replace_na(survey.responses.baitusers$purchase_fishc, 0) #Create a column for each zone to state whether that angler fished there at all during this month survey.responses.baitusers$fish_zone4<-ifelse(survey.responses.baitusers$purchase_fisha=="4","yes", ifelse(survey.responses.baitusers$purchase_fishb=="4","yes", ifelse(survey.responses.baitusers$purchase_fishc=="4","yes", ifelse(survey.responses.baitusers$harvest_fisha=="4", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="4", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="4","yes","no")))))) survey.responses.baitusers$fish_zone4<-ifelse(survey.responses.baitusers$purchase_fisha=="4","yes", ifelse(survey.responses.baitusers$purchase_fishb=="4","yes", ifelse(survey.responses.baitusers$purchase_fishc=="4","yes", ifelse(survey.responses.baitusers$harvest_fisha=="4", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="4", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="4","yes","no")))))) survey.responses.baitusers$fish_zone1<-ifelse(survey.responses.baitusers$purchase_fisha=="1","yes", ifelse(survey.responses.baitusers$purchase_fishb=="1","yes", ifelse(survey.responses.baitusers$purchase_fishc=="1","yes", ifelse(survey.responses.baitusers$harvest_fisha=="1", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="1", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="1","yes","no")))))) survey.responses.baitusers$fish_zone2<-ifelse(survey.responses.baitusers$purchase_fisha=="2","yes", ifelse(survey.responses.baitusers$purchase_fishb=="2","yes", ifelse(survey.responses.baitusers$purchase_fishc=="2","yes", ifelse(survey.responses.baitusers$harvest_fisha=="2", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="2", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="2","yes","no")))))) survey.responses.baitusers$fish_zone3<-ifelse(survey.responses.baitusers$purchase_fisha=="3","yes", ifelse(survey.responses.baitusers$purchase_fishb=="3","yes", ifelse(survey.responses.baitusers$purchase_fishc=="3","yes", ifelse(survey.responses.baitusers$harvest_fisha=="3", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="3", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="3","yes","no")))))) survey.responses.baitusers$fish_zone5<-ifelse(survey.responses.baitusers$purchase_fisha=="5","yes", ifelse(survey.responses.baitusers$purchase_fishb=="5","yes", ifelse(survey.responses.baitusers$purchase_fishc=="5","yes", ifelse(survey.responses.baitusers$harvest_fisha=="5", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="5", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="5","yes","no")))))) survey.responses.baitusers$fish_zone6<-ifelse(survey.responses.baitusers$purchase_fisha=="6","yes", ifelse(survey.responses.baitusers$purchase_fishb=="6","yes", ifelse(survey.responses.baitusers$purchase_fishc=="6","yes", ifelse(survey.responses.baitusers$harvest_fisha=="6", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="6", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="6","yes","no")))))) survey.responses.baitusers$fish_zone7<-ifelse(survey.responses.baitusers$purchase_fisha=="7","yes", ifelse(survey.responses.baitusers$purchase_fishb=="7","yes", ifelse(survey.responses.baitusers$purchase_fishc=="7","yes", ifelse(survey.responses.baitusers$harvest_fisha=="7", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="7", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="7","yes","no")))))) survey.responses.baitusers$fish_zone8<-ifelse(survey.responses.baitusers$purchase_fisha=="8","yes", ifelse(survey.responses.baitusers$purchase_fishb=="8","yes", ifelse(survey.responses.baitusers$purchase_fishc=="8","yes", ifelse(survey.responses.baitusers$harvest_fisha=="8", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="8", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="8","yes","no")))))) survey.responses.baitusers$fish_zone9<-ifelse(survey.responses.baitusers$purchase_fisha=="9","yes", ifelse(survey.responses.baitusers$purchase_fishb=="9","yes", ifelse(survey.responses.baitusers$purchase_fishc=="9","yes", ifelse(survey.responses.baitusers$harvest_fisha=="9", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="9", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="9","yes","no")))))) survey.responses.baitusers$fish_zone10<-ifelse(survey.responses.baitusers$purchase_fisha=="10","yes", ifelse(survey.responses.baitusers$purchase_fishb=="10","yes", ifelse(survey.responses.baitusers$purchase_fishc=="10","yes", ifelse(survey.responses.baitusers$harvest_fisha=="10", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="10", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="10","yes","no")))))) survey.responses.baitusers$fish_zone11<-ifelse(survey.responses.baitusers$purchase_fisha=="11","yes", ifelse(survey.responses.baitusers$purchase_fishb=="11","yes", ifelse(survey.responses.baitusers$purchase_fishc=="11","yes", ifelse(survey.responses.baitusers$harvest_fisha=="11", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="11", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="11","yes","no")))))) survey.responses.baitusers$fish_zone12<-ifelse(survey.responses.baitusers$purchase_fisha=="12","yes", ifelse(survey.responses.baitusers$purchase_fishb=="12","yes", ifelse(survey.responses.baitusers$purchase_fishc=="12","yes", ifelse(survey.responses.baitusers$harvest_fisha=="12", "yes", ifelse(survey.responses.baitusers$harvest_fishb=="12", "yes", ifelse(survey.responses.baitusers$harvest_fishc=="12","yes","no")))))) #make dataframe long baitusers.long<-gather(survey.responses.baitusers,"zone", "choice", fish_zone1,fish_zone2,fish_zone3,fish_zone4, fish_zone5,fish_zone6,fish_zone7,fish_zone8,fish_zone9,fish_zone10,fish_zone11,fish_zone12) baitusers.long<-filter(baitusers.long,choice=="yes") ggplot(baitusers.long)+aes(zone,fill=Q16_1_release)+geom_bar() ggplot(baitusers.long)+aes(zone,fill=CUST_STATE)+geom_bar(position="fill") #Trips per zone: fish_zone1<-nrow(filter(survey.responses.baitusers,fish_zone1=="yes")) fish_zone2<-nrow(filter(survey.responses.baitusers,fish_zone2=="yes")) fish_zone3<-nrow(filter(survey.responses.baitusers,fish_zone3=="yes")) fish_zone4<-nrow(filter(survey.responses.baitusers,fish_zone4=="yes")) fish_zone5<-nrow(filter(survey.responses.baitusers,fish_zone5=="yes")) fish_zone6<-nrow(filter(survey.responses.baitusers,fish_zone6=="yes")) fish_zone7<-nrow(filter(survey.responses.baitusers,fish_zone7=="yes")) fish_zone8<-nrow(filter(survey.responses.baitusers,fish_zone8=="yes")) fish_zone9<-nrow(filter(survey.responses.baitusers,fish_zone9=="yes")) fish_zone10<-nrow(filter(survey.responses.baitusers,fish_zone10=="yes")) fish_zone11<-nrow(filter(survey.responses.baitusers,fish_zone11=="yes")) fish_zone12<-nrow(filter(survey.responses.baitusers,fish_zone12=="yes")) trips.per.zone<-data.frame(fish_zone1,fish_zone2,fish_zone3,fish_zone4,fish_zone5,fish_zone6, fish_zone7,fish_zone8,fish_zone9,fish_zone10,fish_zone11,fish_zone12) tpz<-gather(trips.per.zone,key = "zone", "Number_trips", fish_zone1,fish_zone2,fish_zone3,fish_zone4,fish_zone5,fish_zone6, fish_zone7,fish_zone8,fish_zone9,fish_zone10,fish_zone11,fish_zone12) #create data tables and merge library(data.table) df<-data.table(tpz) df2<-data.table(baitusers.long) df2[,.N,by=.(zone,disposal_method)] #gives number of each disposal method by zone dfm<-merge(df2[,.N,by=.(zone,disposal_method)],df) #merges on zone dfm[,.prop:=N/Number_trips,] #create new column for the proportions within each zone dfx<-merge(df2[,.N,by=.(zone,Q16_1_release)],df) #merges on zone dfx[,.prop:=N/Number_trips,] #create new column for the proportions within each zone dfx #test for independence between release behavior and zone of fishing release_by_zone<-data.table(read.csv("release_by_zone.csv",row.names=1)) chisq.test(release_by_zone) #test for significant difference in number of trips per year between bait users and nonbait users t.test(survey.responses.baitusers$trips.per.year,survey.responses.nonbait$trips.per.year) ###Calculating average number of fish brought on fishing trip #rename columns survey.responses.baitusers<-rename(survey.responses.baitusers,whitesuckers="Q15_1_1") %>% rename(fatheadminnows="Q15_2_1") %>% rename(goldenshiners="Q15_3_1") %>% rename(othershiners= "Q15_4_1") %>% rename(redbellydace= "Q15_5_1") %>% rename(redtailchubs= "Q15_6_1") %>% rename(otherchubs="Q15_7_1") %>% rename(otherfish="Q15_8_1") #change into numbers; anything that includes text (eg "some" or "x" becomes NA) survey.responses.baitusers$whitesuckers<-as.numeric(as.character(survey.responses.baitusers$whitesuckers),stringsAsFactors=FALSE) survey.responses.baitusers$fatheadminnows<-as.numeric(as.character(survey.responses.baitusers$fatheadminnows),stringsAsFactors=FALSE) survey.responses.baitusers$goldenshiners<-as.numeric(as.character(survey.responses.baitusers$goldenshiners),stringsAsFactors=FALSE) survey.responses.baitusers$othershiners<-as.numeric(as.character(survey.responses.baitusers$othershiners),stringsAsFactors=FALSE) survey.responses.baitusers$redbellydace<-as.numeric(as.character(survey.responses.baitusers$redbellydace),stringsAsFactors=FALSE) survey.responses.baitusers$redtailchubs<-as.numeric(as.character(survey.responses.baitusers$redtailchubs),stringsAsFactors=FALSE) survey.responses.baitusers$otherchubs<-as.numeric(as.character(survey.responses.baitusers$otherchubs),stringsAsFactors=FALSE) survey.responses.baitusers$otherfish<-as.numeric(as.character(survey.responses.baitusers$otherfish),stringsAsFactors=FALSE) #summarize average number of fish purchased and number of anglers who purchased that type of fish library(reshape2) library(data.table) typefish<-survey.responses.baitusers[97:112] #select just columns of interest #melt data to make long SRBlong<-reshape2::melt(typefish,measure.vars=c("whitesuckers","WTS.yn","fatheadminnows","FHM.yn","goldenshiners", "GSH.yn","othershiners","OSH.yn","redbellydace","RBD.yn","redtailchubs", "RTC.yn","otherchubs","OTC.yn","otherfish","OFS.yn"), variable.name = "fish", value.name = "number") #remove any fish number that is 0 to get an average number of fish purchased among anglers who purchased that particular species (don't zero-inflate mean and SD) SRBlong<-subset(SRBlong,number>0) #convert to data table SRBlong.dt<-data.table(SRBlong) #use data.table setkey(SRBlong.dt,fish) typefish.summary<-SRBlong.dt[,list(totalusers=sum(number,na.rm = TRUE), meanfish=mean(number,na.rm = TRUE), sdfish=sd(number,na.rm = TRUE), maxfish=max(number,na.rm = TRUE)), by=fish] #Count total number of fish (of any species) brought on a trip survey.responses.baitusers$fish.per.trip<-NA #loop to calculate total number of fish for each bait user for(i in 1:length(survey.responses.baitusers$PrimaryID)){ survey.responses.baitusers[i,"fish.per.trip"]<-sum(survey.responses.baitusers[i,"whitesuckers"], survey.responses.baitusers[i,"fatheadminnows"], survey.responses.baitusers[i,"goldenshiners"], survey.responses.baitusers[i,"othershiners"], survey.responses.baitusers[i,"redbellydace"], survey.responses.baitusers[i,"redtailchubs"], survey.responses.baitusers[i,"otherchubs"], survey.responses.baitusers[i,"otherfish"],na.rm = TRUE) } #calculate mean and SD and look at distribution mean(survey.responses.baitusers$fish.per.trip,na.rm = TRUE) sd(survey.responses.baitusers$fish.per.trip,na.rm = TRUE) ggplot(survey.responses.baitusers)+geom_histogram(aes(fish.per.trip),binwidth = 10) #large spike...what's the most commonly reported number of fish? getmode <- function(v) { #create mode function because there isn't a built in one in R uniqv <- unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } getmode(survey.responses.baitusers$fish.per.trip) #24 is the most commonly reported number of fish; likely artifact because of use of "dozen" in reporting ##How do species purchases vary by month? SRBlong2<-reshape2::melt(survey.responses.baitusers,measure.vars=c("whitesuckers","fatheadminnows","goldenshiners", "othershiners","redbellydace","redtailchubs", "otherchubs","otherfish"), variable.name = "fish", value.name = "number") SRBlong2<-subset(SRBlong2,number>0) SRBlong2<-subset(SRBlong2,Q3a != "") SRBlong2%>% group_by(Q3a) %>% summarize(n=n()) #remove missing months (n-7) SRBlong2$Q3a<-factor(SRBlong2$Q3a,levels=c("January","February","March","April","May","June", "July","August","September", "October", "November", "December")) ##Count total number of AFT-susceptible fish brought on a trip survey.responses.baitusers$AFT.susc<-NA #loop to calculate total number of AFT-susc fish for each bait user for(i in 1:length(survey.responses.baitusers$PrimaryID)){ survey.responses.baitusers[i,"AFT.susc"]<-sum(survey.responses.baitusers[i,"fatheadminnows"], survey.responses.baitusers[i,"goldenshiners"], survey.responses.baitusers[i,"othershiners"], survey.responses.baitusers[i,"otherchubs"], na.rm = TRUE) } #filter out anglers who did not use AFT: AFTusers<-filter(survey.responses.baitusers,AFT.susc>0) #fit distribution to total number of AFT-susc fish used by anglers who used AFT-susc fish max(AFTusers$AFT.susc) min(AFTusers$AFT.susc) mean(AFTusers$AFT.susc) hist(AFTusers$AFT.susc) fitdistrplus::fitdist(AFTusers$AFT.susc,distr=dexp,method="mle") #rate=0.0248, Beta=40.3 #create season variable: SRBlong2$season<-ifelse(SRBlong2$Q3a=="May"| SRBlong2$Q3a=="June"| SRBlong2$Q3a=="July"| SRBlong2$Q3a=="August"| SRBlong2$Q3a=="September"| SRBlong2$Q3a=="October","open", "ice") #plot number of each fish species purchased by month: ggplot(SRBlong2, aes(Q3a, number)) + geom_bar(aes(fill = fish), stat = "summary", position = "dodge",fun.y="mean") #plot number of fish per species by season: SRBlong2$season<-ifelse(SRBlong2$Q3a=="May"| SRBlong2$Q3a=="June"| SRBlong2$Q3a=="July"| SRBlong2$Q3a=="August"| SRBlong2$Q3a=="September"| SRBlong2$Q3a=="October","open", "ice") ggplot(SRBlong2, aes(season, number)) + geom_bar(aes(fill = fish), stat = "summary", position = "dodge",fun.y="mean")+ylab('mean number purchased') summary(lm(data=SRBlong2,number~fish+1)) library(car) Anova(lm(data=SRBlong2,number~fish)) mod<-aov(data=SRBlong2,number~fish*season) summary(mod) TukeyHSD(mod) ##Is there a seasonal difference in purchasing of OVIO-susc fish (ie golden shiners?) survey.responses.baitusers$season<-ifelse(survey.responses.baitusers$Q3a=="May"| survey.responses.baitusers$Q3a=="June"| survey.responses.baitusers$Q3a=="July"| survey.responses.baitusers$Q3a=="August"| survey.responses.baitusers$Q3a=="September"| survey.responses.baitusers$Q3a=="October","open", "ice") ##Is there a seasonal difference in purchasing of golden shiners? summary(lm(data=survey.responses.baitusers,goldenshiners~season)) #no ##Is there a seasonal difference in number of trips taken? n<-length(survey.responses.baitusers$PrimaryID) for (i in 1:n) { survey.responses.baitusers$trips.in.summer[i]<-sum(survey.responses$Days_May18[i], survey.responses$Days_Jun18[i], survey.responses$Days_Jul18[i], survey.responses$Days_Aug18[i], survey.responses$Days_Sep18[i], survey.responses$Days_Oct18[i],na.rm = TRUE) survey.responses.baitusers$trips.in.winter[i]<-sum(survey.responses$Days_Jan18[i], survey.responses$Days_Feb18[i], survey.responses$Days_Mar18[i], survey.responses$Days_Apr18[i], survey.responses$Days_Nov18[i], survey.responses$Days_Dec18[i],na.rm = TRUE) } ##Is there a seasonal difference in number of trips per season? mean(survey.responses.baitusers$trips.in.summer,na.rm = TRUE) mean(survey.responses.baitusers$trips.in.winter,na.rm = TRUE) t.test(survey.responses.baitusers$trips.in.summer,survey.responses.baitusers$trips.in.winter) ##Significant difference in the number of trips. Fit distributions to each individually fitdist(survey.responses.baitusers$trips.in.summer,distr = "exp", method = "mle") fitdist(survey.responses.baitusers$trips.in.winter,distr = "exp", method = "mle") mean(rexp(10000,rate = 0.0553)) mean(rexp(10000,rate=0.356)) #What is the maximum number of trips taken in each season (for the purposes of truncating exponential distributions in realistic place) max(survey.responses.baitusers$trips.in.summer,na.rm = TRUE) max(survey.responses.baitusers$trips.in.winter,na.rm = TRUE) survey.responses$Q3a<-factor(survey.responses$Q3a,levels=c("January","February","March","April","May","June", "July","August","September", "October", "November", "December")) survey.responses$season<-ifelse(survey.responses$Q3a=="May"| survey.responses$Q3a=="June"| survey.responses$Q3a=="July"| survey.responses$Q3a=="August"| survey.responses$Q3a=="September"| survey.responses$Q3a=="October","open", "ice") ## 5/26/20 below code was used for fitting dist. to total fish purchased. commenting now for later use #fit poisson curve to data using optim # minus.logL.s<-function(lambda, dat){ # -sum(dpois(dat,lambda, log=TRUE))} # # mle<-optim(par=45,fn=minus.logL.s, method="BFGS",hessian=T,dat=survey.responses.baitusers$fish.per.trip) # mle # # #simulate data coming from a poisson distribution of mean 38 # simspois<-as.data.frame(rpois(5000, 38)) # colnames(simspois)<-("simulated_values") # # #fit negative binomial curve # minus.logL.nb<-function(pars, dat){ # mu<-pars[1] # size<-pars[2] # -sum(dnbinom(dat, mu=mu, size=size,log=TRUE))} # mlenb<-optim(par=c(mu=38,size=1),fn=minus.logL.nb, method="BFGS",hessian=T,dat=survey.responses.baitusers$fish.per.trip) # mlenb # #simulate data coming from a negative binomial curve # simsnegbin<-as.data.frame(rnbinom(3000,size=4, mu=38)) # colnames(simsnegbin)<-("simulated_valuesnb") # # #fit negative binomial curve with alternative parameterization # minus.logL.nb.p<-function(pars, dat){ # p<-pars[1] # size<-pars[2] # -sum(dnbinom(dat, prob=p, size=size,log=TRUE))} # # mlenbp<-optim(par=c(prob=0.3,size=5),fn=minus.logL.nb.p, method="BFGS",hessian=T,dat=survey.responses.baitusers$fish.per.trip) # mlenbp # simsnegbinp<-as.data.frame(rnbinom(700, size=4, p=0.1)) # colnames(simsnegbinp)<-("simulated_valuesnbp") # # #fit weibull distribution # minus.logL.weib<-function(pars, dat){ # shape<-pars[1] # scale<-pars[2] # -sum(dweibull(dat, shape=shape, scale=scale,log=TRUE))} # # mleweib<-optim(par=c(shape=30,scale=1),fn=minus.logL.weib, method="BFGS",hessian=T,dat=survey.responses.baitusers$fish.per.trip) # mleweib # # simsweibs<-as.data.frame(rweibull(10000,shape=2,scale=43)) # colnames(simsweibs)<-("simulated_valuesweib") # # ggplot(simsweibs)+aes(simulated_valuesweib)+geom_histogram(aes(y=..density..))+geom_density(data=survey.responses.baitusers,aes(fish.per.trip)) # ggplot(survey.responses.baitusers)+geom_density # # ggplot(simsweibs)+aes(simulated_valuesweib)+geom_density(alpha=0.2, fill="blue")+geom_histogram(data=survey.responses.baitusers,aes(x=fish.per.trip,y=..density..,alpha=0.7,fill="blue")) # # #Note: spike at 34 is probably an artifact of people estimating since it's 2 dozen + 10. # #graph both # graph<-ggplot(survey.responses.baitusers)+aes(fish.per.trip)+geom_histogram()+geom_smooth(data=simspois, aes(simulated_values), stat = "count",color="red")+geom_smooth(data=simsnegbin, aes(simulated_valuesnb), stat="count", color="blue") # graph #Plot number of trips per year and whether or not a person released #make a column for binary yes/no release or not #survey.responses.baitusers$Q16_1_release<-ifelse(survey.responses.baitusers$disposal_method=="Release in water","yes","no") #trips.plot<-ggplot(survey.responses.baitusers)+ # aes(x=trips.per.year, fill=Q16_1_release) + # geom_histogram(bins = 100) + stat_bin(bins = 50) + # ggtitle("Number of trips per angler per year") + # xlab("Number of trips")+ # theme_bw()+ # line(x=27,) # theme(text = element_text(size=24), axis.text.x = element_text(size=24), axis.text.y = element_text(size=24))+ # labs(fill="Release") # trips.plot mean(survey.responses.baitusers$trips.per.year) ##what is the relationship between trips per year and release probability? #create new column of ones and zeros for release survey.responses.baitusers$releasebinom<-ifelse(survey.responses.baitusers$Q16_1_release=="yes",1,0) summary(glm(data=survey.responses.baitusers,Q16_1_release~trips.per.year, family="binomial")) #relationship between trips per year and release probability for bait users summary(lm(data=survey.responses,trips.per.year~live.bait)) #relationship between trips per year and use of live bait summary(lm(data=survey.responses,trips.per.year~Q16_1_release)) #Within bait users it seems as though there is no significant effect of trips per year # (a measure of avidity) on release probability, however bait users take more trips per year on # average than non bait users. #fit exponential distribution to the number of trips for all anglers minus.logL.exp<-function(rate, dat){ -sum(dexp(dat,rate=rate, log=TRUE))} mlexp<-optim(par=1,fn=minus.logL.exp, method="BFGS",hessian=T,dat=survey.responses$trips.per.year) mlexp #estimated rate parameter using the above is 0.041 #using fitdistrplus instead of optim library(fitdistrplus) fitdist(survey.responses$trips.per.year,"exp",method="mle") #This is also estimating a "rate" of 0.041 #Simulating exponentially distributed data simsexp<-as.data.frame(rexp(10000,rate=mlexp$par)) colnames(simsexp)<-("simulated_valuesexp") ggplot(simsexp)+aes(simulated_valuesexp)+geom_density(alpha=0.2, fill="blue")+geom_histogram(data=survey.responses,aes(x=trips.per.year,y=..density..,alpha=0.7,fill="blue")) #k-s test for differences between simulated and actual data: ks.test(simsexp,survey.responses.baitusers$trips.per.year) #KS test finds p=0.01 indicating significant differences between empirical and theoretical distribution. ##Exponential distribution with rate=0.041 is indicated by both optim and fit distr functions but it's not great. #large spike at number of trips=5. Assuming this is an artifact of angler recall bias and ignoring; proceeding with exponential fit for this distrubiont #However, there are significant differences in number of trips per year between bait users and non-bait users as well: #fit exponential distribution to the number of trips for JUST BAIT USERS mlexpb<-optim(par=1,fn=minus.logL.exp, method="BFGS",hessian=T,dat=survey.responses.baitusers$trips.per.year) mlexpb #rate estimate=0.035 #using fitdistrplus instead of optim fitdist(survey.responses.baitusers$trips.per.year,"exp",method="mle") #This is also estimating a "rate" of 0.035 #Simulating exponentially distributed data simsexpb<-as.data.frame(rexp(10000,rate=mlexpb$par)) colnames(simsexpb)<-("simulated_valuesexpb") ggplot(simsexpb)+aes(simulated_valuesexpb)+geom_density(alpha=0.2, fill="blue")+geom_histogram(data=survey.responses.baitusers,aes(x=trips.per.year,y=..density..,alpha=0.7,fill="blue")) #Poisson? fitpois<-fitdist(survey.responses.baitusers$trips.per.year,"pois",method = "mle") #Simulating poisson distributed data simspoisb<-as.data.frame(rpois(10000,lambda = 27.95)) colnames(simspoisb)<-("simulated_valuespoisb") ggplot(simspoisb)+aes(simulated_valuespoisb)+geom_density(alpha=0.2, fill="blue")+geom_histogram(data=survey.responses.baitusers,aes(x=trips.per.year,y=..density..,alpha=0.7,fill="blue")) ks.test(simspoisb,survey.responses.baitusers$trips.per.year) #horrible fit ks.test(simsexpb,survey.responses.baitusers$trips.per.year) #good fit #KS test indicates good fit for the exponential distribution with rate 0.035, and very pooer fit for poisson # to empirical distribution of trips per year among bait users ###Map percentage of live bait release in different zones/regions survey.responses %>% group_by(purchase_fisha,released) %>% summarize(n=n()) survey.responses <- rename(survey.responses, Method_Release="Q16_1") survey.responses <- rename(survey.responses,Method_Land="Q16_2") %>% rename(Method_Friend="Q16_3") %>% rename(Method_Other="Q16_4") %>% rename(Method_NoLeftover="Q16_5") #make long survey.responses.long<-gather(survey.responses,"Disposal_Method", "Choice", Method_Release, Method_Land, Method_Friend, Method_Other, Method_NoLeftover) survey.responses.long$Choice <- replace_na(survey.responses.long$Choice, "no") survey.responses.long <-select(survey.responses,Choice=="Released into the water") #remove "no" choices to include in graph # Creating a data table of disposal method by fishing library(data.table) #mike <- data.table(postcard.data.long) #mikeb <- data.table(postcard.data) responses.table<-data.table(survey.responses) responses.table[ , .N, by=.(purchase_zonea,released)] mikeb[, .N, by = .(Location)] #gives number of respondents by location mike[ , .N, by = .(Location, Disposal_Method)] #gives number of respondents by disposal method and by location mikec <- merge(mike[ , .N, by = .(Location, Disposal_Method)],mikeb[, .N, by = .(Location)], by = "Location") #Creates one dataframe with number of respondents by disposal method per location as well as total number of respondents per location mikec[,prop := N.x/N.y , ] #See "C:\Users\thom4412\Box\1824444_Thompson\merging data.R" for code for creating the merged file ###Scenario analysis: statewide outbreak of VHSV in FHM and em and spt shiners #Create variable to randomly assign whether an angler bought susc shiners x<-c(0,1) susc<-c() for (i in 1:length(survey.responses.baitusers$OSH.yn)){ if(survey.responses.baitusers$OSH.yn[i]==1){ susc[i] = sample(x,size=1,prob=c(0.16,0.84),replace=TRUE) } else if(survey.responses.baitusers$OSH.yn[i]==0){susc[i]=0} } #Add new variable column to dataframe survey.responses.baitusers$use.OSH.susc<-susc #Create variables to identify whether an angler used FHM or susc shiners and how many of those species they bought for (i in 1:length(survey.responses.baitusers$OSH.yn)){ #use susc? survey.responses.baitusers$use.VHSV.susc[i]<-ifelse(survey.responses.baitusers$use.OSH.susc[i]==1|survey.responses.baitusers$FHM.yn[i]==1,1,0) #create variable for using susceptible if either FHM or susc OSH or both are used #number of susc? survey.responses.baitusers$num.VHSV.susc[i]<-survey.responses.baitusers$fatheadminnows[i] + #number of fathead minnows + ifelse(survey.responses.baitusers$use.OSH.susc[i]==1,survey.responses.baitusers$othershiners[i],0) #number of other shiners, if they're susceptible } survey.responses.baitusers %>% group_by(OSH.yn) %>% summarize(n=n()) survey.responses.baitusers %>% group_by(FHM.yn) %>% summarize(n=n()) survey.responses.baitusers %>% group_by(use.OSH.susc) %>% summarize(n=n(),mean=mean(othershiners,na.rm = TRUE)) survey.responses.baitusers %>% group_by(use.VHSV.susc) %>% summarize(n=n()) #82% of bait users use either FHM or susc OSH #Fit distribution to number of vhsv susc fishes (including fatheads) purchased VHSVusers<-filter(survey.responses.baitusers,num.VHSV.susc>0) fitdistrplus::fitdist(VHSVusers$num.VHSV.susc,dexp,"mle") mean(VHSVusers$num.VHSV.susc) max(VHSVusers$num.VHSV.susc) mean(rexp(1000,rate=0.0275)) #Fit distribution to number of other shiners purchased OSHusers<-filter(survey.responses.baitusers,use.OSH.susc==1) OSHusers<-filter(OSHusers,othershiners>0) #remove rows where number of fish was coerced from "x" to NA to estimate numbers fitdistrplus::fitdist(OSHusers$othershiners,dexp,"mle") max(OSHusers$othershiners) mean(OSHusers$othershiners) ###Scenario analysis: outbreak of VHSV in Lake Superior Watershed (zone 3) #create dataframe that only includes anglers who purchased or harvested bait in zone 3 localized.VHSV.Outbreak<-subset(survey.responses.baitusers, purchase_zonea==3|purchase_zoneb==3|purchase_zonec==3|harvest_zonea==3|harvest_zonea==3|harvest_zoneb==3|harvest_zonec==3) #How many bait users get bait from this area? 42/480 bait users=8.75% of bait users get bait from this watershed length(localized.VHSV.Outbreak$PrimaryID) #What's the probability of an angler in this area using "other shiners"/VHSV susc. fish? localized.VHSV.Outbreak %>% group_by(OSH.yn,live.bait) %>% summarize(n=n(), mean=mean(othershiners,na.rm = TRUE)) #10/42=23.8% anglers used other shiners from this area; 1 harvested 9 purchased #Compared to other regions, seems like there's some that are more and some that are less: survey.responses.baitusers %>% group_by(purchase_zonea,OSH.yn) %>% summarize(n=n(), mean=mean(othershiners,na.rm = TRUE)) #What is the probability of an angler in this area using n other shiners,given that they use other shiners? OSH.users.zone3<-subset(localized.VHSV.Outbreak,OSH.yn==1) #people who use shiners in zone 3 hist(OSHusers$othershiners) hist(OSH.users.zone3$othershiners) mean(OSH.users.zone3$othershiners, na.rm = TRUE) mean(OSH.users$othershiners,na.rm = TRUE) t.test(OSH.users$othershiners,OSH.users.zone3$othershiners) #t.test shows no sig. difference in average number of fish purchased #Use OSH-buying info from all anglers (parameterized in @risk by jan) b/c no sig differences apparently and there's not enough data w/in this subset to fit a distribution #How many trips do anglers in this zone take? mean(OSH.users.zone3$trips.per.year,na.rm = TRUE) t.test(survey.responses.baitusers$trips.per.year,localized.VHSV.Outbreak$trips.per.year) ks.test(survey.responses.baitusers$trips.per.year,localized.VHSV.Outbreak$trips.per.year) #no sig difference for OSH users or anglers in this zone, so going to use same distribution as general pop. #What's the probability of an angler in this zone releasing leftover live baitfish? localized.VHSV.Outbreak %>% group_by(Q16_1_release) %>% summarize(n=n()) #approx. 16%...is this signicantly lower than for general state anglers? NO #where do anglers who buy fish in this zone go? localized.VHSV.Outbreak %>% group_by(fish_zone1) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone2) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone3) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone4) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone5) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone6) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone7) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone8) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone9) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone10) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone11) %>% summarize(n=n()) localized.VHSV.Outbreak %>% group_by(fish_zone12) %>% summarize(n=n()) #Wrote data into spreadsheetusing the following code on 7/21/20: write.csv(survey.responses.baitusers$num.VHSV.susc, file="numberVHSSuscFish_state.csv", ) write.csv(localized.VHSV.Outbreak$num.VHSV.susc, file="numberVHSSuscFish_local.csv", ) #Check for crossing watershed boundary: survey.responses.baitusers$cross_zonea<-NA survey.responses.baitusers$cross_zoneb<-NA survey.responses.baitusers$cross_zonec<-NA for (i in 1:length(survey.responses.baitusers$PrimaryID)){ survey.responses.baitusers$cross_zonea[i]<-ifelse(survey.responses.baitusers$purchase_zonea[i]==survey.responses.baitusers$purchase_fisha[i],0,1) survey.responses.baitusers$cross_zoneb[i]<-ifelse(survey.responses.baitusers$purchase_zoneb[i]==survey.responses.baitusers$purchase_fishb[i],0,1) survey.responses.baitusers$cross_zonec[i]<-ifelse(survey.responses.baitusers$purchase_zonec[i]==survey.responses.baitusers$purchase_fishc[i],0,1) } #sum to find total: sum(survey.responses.baitusers$cross_zonea,na.rm = TRUE) #How often did bait come in from out of state? occurrences<-table(unlist(survey.responses.baitusers)) occurrences["Outside MN"] #code for writing csvs write.csv(survey.responses, file="allresponses.csv") write.csv(survey.responses.baitusers,file="baitusers.csv")