library(tidyverse)
library(sf)
library(mgcv)
library(arrow)
library(gratia)
early.anomaly = -17.52
late.anomaly = 14
# 69037800 Ver - Bad data prior to 2018
# 25000100 Pepin - missing data
surveys_ef <- read.csv("./data/yoy_data.csv")
ice_off <- read.csv("./data/ice_off_summarized.csv")
# 77021500, 27013700, 81001400 - No EF surveys
long_term <- ice_off %>% group_by(DOW) %>% count() %>% filter(n >= 100) %>%
filter(!DOW %in% c("27013700", "81001400")) # Lakes without EF surveys
# Calulate average ice off long term
average <- ice_off %>% dplyr::filter(DOW %in% long_term$DOW) %>%
filter(year < 1980) %>%
group_by(DOW) %>% summarise(mean_ice_off = mean(min_ice_off_julian))
anomaly <- average %>% merge(ice_off, by = "DOW") %>% filter(year > 1980) %>%
mutate(anomaly = min_ice_off_julian - mean_ice_off) %>%
group_by(year) %>%
summarize(average_anomaly = mean(anomaly),
sd_anomaly = sd(anomaly))
Filter down to modeling dataset
plot(y = anomaly$average_anomaly, x = anomaly$year)

mean(anomaly$average_anomaly)
## [1] -4.088399
efish_wae_ice <- surveys_ef %>%
dplyr::select(year, lake_id, catch, total_effort_1, x, y, acres, julian_day_survey, FRY, fry.pa) %>%
rename(DOW = lake_id) %>%
mutate(log_acres = log(acres),
cpue = catch/total_effort_1)
efish_ice_anomaly <- merge(efish_wae_ice, anomaly, by = c("year")) %>%
mutate(DOW = as.factor(DOW),
year_f = as.factor(year),
average_anomaly_abs = abs(average_anomaly))
three.years <- efish_ice_anomaly %>% group_by(DOW) %>% count() %>% dplyr::filter(n >= 3)
efish_ice <- efish_ice_anomaly %>% dplyr::filter(DOW %in% three.years$DOW)
Survey Info
length(unique(efish_ice$DOW))
## [1] 218
min(efish_ice$year);max(efish_ice$year)
## [1] 1993
## [1] 2022
lakes = efish_ice %>% group_by(DOW) %>% count()
median(lakes$n) # Median surveys per lake = 8.5
## [1] 8.5
num.per.year = efish_ice %>% group_by(year) %>% count() %>% ungroup()
mean(num.per.year$n)
## [1] 75.56667
Wae recruitment ICE
Uses non lake specific ice off anomalys (only year specific)
gam.ice <- gam(catch ~ s(average_anomaly) + s(log_acres) + s(x, y) + s(julian_day_survey) +
s(DOW, bs = "re") + s(year_f, bs = "re") + offset(log(total_effort_1)),
method = "REML", select = T,
family = nb(),
data = efish_ice)
#write_rds(gam.ice, "./models/yoy_ice_gam.rds")
#gam.ice <- readRDS("./models/yoy_ice_gam.rds")
appraise(gam.ice)

gratia::draw(gam.ice)

summary(gam.ice) # 41.1 % deviance explained
##
## Family: Negative Binomial(0.587)
## Link function: log
##
## Formula:
## catch ~ s(average_anomaly) + s(log_acres) + s(x, y) + s(julian_day_survey) +
## s(DOW, bs = "re") + s(year_f, bs = "re") + offset(log(total_effort_1))
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.9495 0.1108 26.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(average_anomaly) 1.6536 9 120.19 0.00336 **
## s(log_acres) 0.9683 9 3143.30 < 2e-16 ***
## s(x,y) 10.8999 29 9358.95 < 2e-16 ***
## s(julian_day_survey) 2.3862 9 114.30 5.67e-06 ***
## s(DOW) 176.0675 217 1243.04 < 2e-16 ***
## s(year_f) 18.5314 29 76.11 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.4 Deviance explained = 41.1%
## -REML = 10266 Scale est. = 1 n = 2267
Fitted samples plot
efish_ice %>% select(total_effort_1, log_acres, julian_day_survey, x, y) %>%
summarize(across(c(total_effort_1, log_acres, julian_day_survey, x, y),
~ mean(.x, na.rm = TRUE)))
## total_effort_1 log_acres julian_day_survey x y
## 1 1.384742 7.458838 266.247 -94.6739 46.08509
pred.df = data.frame(
average_anomaly = seq(-24.96848531, 18.6, by = 0.5),
x = -94.7, y = 46.1,
julian_day_survey = 266, log_acres = 7,
DOW = "56038300", year_f = 2011, total_effort_1 = 1.38) %>%
mutate(.row = row_number())
fs <- fitted_samples(gam.ice, data = pred.df, n = 1000, seed = 1024,
exclude = c('s(DOW)', 's(year_f)')) |>
left_join(pred.df |> select(.row, average_anomaly),
by = join_by(.row == .row)) %>%
mutate(group = as.numeric(.draw))
sm <- smooth_estimates(gam.ice)
jd_smooth <- sm %>%
filter(.smooth == "s(average_anomaly)" ) %>%
add_confint() %>%
add_constant(coef(gam.ice)["(Intercept)"]) %>%
transform_fun(inv_link(gam.ice))
fig <- fs %>%
ggplot() +
# Thick line (expected relationship)
geom_line(data = jd_smooth, aes(x = average_anomaly, y = .estimate), lwd = 1.5) +
geom_ribbon(data = jd_smooth, aes(x = average_anomaly, y = .estimate, ymin = .lower_ci , ymax = .upper_ci),
alpha = 0.3) +
# Thin lines (draws)
geom_line(aes(x = average_anomaly, y = .fitted, group = group), alpha = 0.03) +
labs(x ="Ice-off Anomaly", y = "Fall Age-0 Walleye Catch") +
theme_classic() +
theme(axis.text.y = element_text(size = 16),
axis.text.x = element_text(size = 16),
axis.title.x = element_text(size = 18),
axis.title.y = element_text(size = 18),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14)) +
scale_x_continuous(breaks = c(-15, -10, -5, 0, 5, 10, 15)) +
coord_cartesian(expand = FALSE, xlim = c(-18, NA), ylim = c(0, 63))
fig

# Figure 3B
ggsave("./figures/Figure3B.jpeg", width = 10, height = 10)
Get estimates of decrease in year-class
data_to_predict <- efish_ice %>%
select(average_anomaly, log_acres, x, y, julian_day_survey, total_effort_1, acres) %>%
summarize(across(everything(), ~median(.x, na.rm = T)))
df.results <- data.frame(average = numeric(0), low = numeric(0), high = numeric(0),
age = numeric(0))
rmvn <- function(n, mu, sig) { ## MVN random deviates
L <- mroot(sig);
m <- ncol(L);
t(mu + L%*%matrix(rnorm(m * n), m, n))
}
newd <- data.frame(average_anomaly = c(-17.52, 0, 14), log_acres = log(data_to_predict$acres),
x = data_to_predict$x, y = data_to_predict$y,
julian_day_survey = data_to_predict$julian_day_survey, total_effort_1 = data_to_predict$total_effort_1,
DOW = "10005900", year_f = 2010)
predicted = predict(gam.ice, newd, type = "response",
exclude = c("s(DOW)", "s(year_f)"))
Xp <- predict(gam.ice, newd, type = "lpmatrix",
exclude = c("s(DOW)", "s(year_f)") )
br <- rmvn(1000, coef(gam.ice), gam.ice$Vp) ## 1000 replicate param. vectors
early.ice.off <- rep(0, 1000)
normal.ice.off <- rep(0, 1000)
late.ice.off <- rep(0, 1000)
percent.early.normal <- rep(0, 1000)
percent.early.late <- rep(0, 1000)
diff.early.normal <- rep(0,1000)
diff.early.late <- rep(0,1000)
for (i in 1:1000) {
pr <- Xp %*% br[i,] ## replicate predictions
early.ice.off[i] = exp(pr[1,])
normal.ice.off[i] = exp(pr[2,])
late.ice.off[i] = exp(pr[3,])
percent.early.normal[i] = 1 - (early.ice.off[i]/normal.ice.off[i])
percent.early.late[i] = 1 - (early.ice.off[i]/late.ice.off[i])
diff.early.normal[i] <- early.ice.off[i] - normal.ice.off[i]
diff.early.late[i] <- early.ice.off[i] - late.ice.off[i]
}
median(early.ice.off)
## [1] 13.87396
quantile(early.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 7.476789 25.638167
median(normal.ice.off)
## [1] 17.39929
quantile(normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 9.644915 32.600854
median(late.ice.off)
## [1] 20.20792
quantile(late.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 10.81353 38.62977
# Percent decrease in recruitment from early to normal ice-off (pos = increase)
median(percent.early.normal)*100
## [1] 21.06275
quantile(percent.early.normal, na.rm = T, probs = c(0.025, 0.975))*100
## 2.5% 97.5%
## 2.179129 37.748553
# Percent decrease in recruitment from early to late ice-off (pos = increase)
median(percent.early.late)*100
## [1] 32.81314
quantile(percent.early.late, na.rm = T, probs = c(0.025, 0.975))*100
## 2.5% 97.5%
## 9.882068 49.610653
median(1 - (early.ice.off/late.ice.off), na.rm = T)*100
## [1] 32.81314
quantile(1 - (early.ice.off/late.ice.off), na.rm = T, probs = c(0.025, 0.975))*100
## 2.5% 97.5%
## 9.882068 49.610653
median(early.ice.off - normal.ice.off, na.rm = T) /1.09
## [1] -3.269728
quantile(early.ice.off - normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## -9.4527029 -0.4363203
median(early.ice.off - late.ice.off, na.rm = T)/1.09
## [1] -6.021967
quantile(early.ice.off - late.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## -15.831494 -1.511869
Supplement
WAE recruitment lake specific
pre_1980 <- ice_off %>% filter(year < 1981) %>%
group_by(DOW) %>% count() %>% filter( n > 4)
average <- ice_off %>%
filter(DOW %in% pre_1980$DOW) %>%
filter(year < 1980) %>%
group_by(DOW) %>% summarise(mean_ice_off = mean(min_ice_off_julian))
lake_specific_anomaly <- average %>% merge(ice_off, by = "DOW") %>% filter(year > 1980) %>%
mutate(anomaly = min_ice_off_julian - mean_ice_off) %>%
select(DOW, mean_ice_off, year, anomaly)
efish_wae_ice_lake_specific <- surveys_ef %>%
dplyr::select(year, lake_id, catch, total_effort_1, x, y, acres, julian_day_survey) %>%
mutate(DOW = lake_id) %>%
mutate(log_acres = log(acres),
cpue = catch/total_effort_1) %>%
mutate(DOW = ifelse(DOW == 73020002, 73020000, DOW),
DOW = ifelse(DOW == 73019600, 73020000, DOW),
DOW = ifelse(DOW == 51006300, 51004600, DOW)
) %>%
merge(lake_specific_anomaly, by = c("year", "DOW")) %>%
mutate(DOW = as.factor(DOW),
year_f = as.factor(year),
anomaly_abs = abs(anomaly))
lake_specific_lakes <- efish_wae_ice_lake_specific %>% group_by(DOW) %>%
count() %>%
filter(n > 2)
lake_specific <- efish_wae_ice_lake_specific %>% filter(DOW %in% lake_specific_lakes$DOW)
# Number of lakes
length(unique(lake_specific$DOW))
## [1] 36
range(lake_specific$year)
## [1] 1993 2022
Create model for walleye recruitment lake specific
gam.ice.lake.specific <- gam(catch ~ s(anomaly) + s(log_acres) + s(julian_day_survey) +
s(x, y) +
s(lake_id, bs = "re") + offset(log(total_effort_1)),
method = "REML",
family = nb(),
data = lake_specific)
#saveRDS(gam.ice.lake.specific, "./models/yoy_ice_gam_lake_specific.rds")
#gam.ice.lake.specific <- readRDS("./models/yoy_ice_gam_lake_specific.rds")
appraise(gam.ice.lake.specific)

gratia::draw(gam.ice.lake.specific)

summary(gam.ice.lake.specific) #38%
##
## Family: Negative Binomial(0.578)
## Link function: log
##
## Formula:
## catch ~ s(anomaly) + s(log_acres) + s(julian_day_survey) + s(x,
## y) + s(lake_id, bs = "re") + offset(log(total_effort_1))
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.46930 0.06556 52.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(anomaly) 1.002e+00 1.004 3.335 0.0681 .
## s(log_acres) 5.920e+00 6.624 79.656 <2e-16 ***
## s(julian_day_survey) 5.091e+00 6.236 15.416 0.0181 *
## s(x,y) 1.754e+01 20.696 132.955 <2e-16 ***
## s(lake_id) 7.548e-05 1.000 0.000 0.7194
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.596 Deviance explained = 38%
## -REML = 2098.4 Scale est. = 1 n = 423
k.check(gam.ice.lake.specific)
## k' edf k-index p-value
## s(anomaly) 9 1.001799e+00 0.7689515 0.0225
## s(log_acres) 9 5.920115e+00 0.6095773 0.0000
## s(julian_day_survey) 9 5.091244e+00 0.8376663 0.3650
## s(x,y) 29 1.753968e+01 0.7640590 0.0050
## s(lake_id) 1 7.547995e-05 0.5977862 0.0000
sm <- smooth_estimates(gam.ice.lake.specific)
jd_smooth <- sm %>%
filter(.smooth == "s(anomaly)" ) %>%
add_confint() %>%
add_constant(coef(gam.ice.lake.specific)["(Intercept)"]) %>%
transform_fun(inv_link(gam.ice.lake.specific))
efish_ice_residuals <- lake_specific %>% add_partial_residuals(gam.ice.lake.specific) %>%
add_constant(coef(gam.ice.lake.specific)["(Intercept)"], column = 16) %>%
transform_fun(inv_link(gam.ice.lake.specific), 16)
jd_smooth %>%
ggplot(aes(x = anomaly, y = .estimate)) +
geom_point(aes(x = anomaly, y = `s(anomaly)`),
alpha = 0.2, data = efish_ice_residuals) +
geom_line(lwd = 1.5) +
geom_ribbon(aes(ymin = .lower_ci , ymax = .upper_ci), alpha = 0.2) +
labs(y = "Fall Age-0 Walleye Catch", x = "Ice-Off Anomaly") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 85)) +
#ggtitle("Effects of Anomalous Ice-Off on Walleye Recruitment") +
theme_classic() +
theme(axis.text = element_text(size = 14),
axis.title = element_text(size = 18),
plot.title = element_text(size = 23, hjust = 0.5, face = "bold"))

# Figure S3
ggsave("./figures/FigureS3.jpeg", height = 10, width = 12)
Get estimates of decrease in recruitment with lake specific ice anomalies
data_to_predict <- lake_specific %>%
select(anomaly, log_acres, x, y, julian_day_survey, total_effort_1, acres) %>%
summarize(across(everything(), ~median(.x, na.rm = T)))
df.results <- data.frame(average = numeric(0), low = numeric(0), high = numeric(0),
age = numeric(0))
rmvn <- function(n, mu, sig) { ## MVN random deviates
L <- mroot(sig);
m <- ncol(L);
t(mu + L%*%matrix(rnorm(m * n), m, n))
}
newd <- data.frame(average_anomaly = c(-17.52, 0, 14), log_acres = log(data_to_predict$acres),
x = data_to_predict$x, y = data_to_predict$y,
julian_day_survey = data_to_predict$julian_day_survey, total_effort_1 = data_to_predict$total_effort_1,
DOW = "10005900", year_f = 2010)
predicted = predict(gam.ice, newd, type = "response",
exclude = c("s(DOW)", "s(year_f)"))
Xp <- predict(gam.ice, newd, type = "lpmatrix",
exclude = c("s(DOW)", "s(year_f)") )
br <- rmvn(1000, coef(gam.ice), gam.ice$Vp) ## 1000 replicate param. vectors
early.ice.off <- rep(0, 1000)
normal.ice.off <- rep(0, 1000)
late.ice.off <- rep(0, 1000)
percent.early.normal <- rep(0, 1000)
percent.early.late <- rep(0, 1000)
diff.early.normal <- rep(0,1000)
diff.early.late <- rep(0,1000)
for (i in 1:1000) {
pr <- Xp %*% br[i,] ## replicate predictions
early.ice.off[i] = exp(pr[1,])
normal.ice.off[i] = exp(pr[2,])
late.ice.off[i] = exp(pr[3,])
percent.early.normal[i] = 1 - (early.ice.off[i]/normal.ice.off[i])
percent.early.late[i] = 1 - (early.ice.off[i]/late.ice.off[i])
diff.early.normal[i] <- early.ice.off[i] - normal.ice.off[i]
diff.early.late[i] <- early.ice.off[i] - late.ice.off[i]
}
median(early.ice.off)
## [1] 17.01221
quantile(early.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 9.098088 31.999479
median(normal.ice.off)
## [1] 21.78031
quantile(normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 11.68245 41.21749
median(late.ice.off)
## [1] 25.73516
quantile(late.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## 13.06387 48.35451
median(1 - (early.ice.off/normal.ice.off), na.rm = T)*100
## [1] 21.61131
quantile(1 - (early.ice.off/normal.ice.off), na.rm = T, probs = c(0.025, 0.975))*100
## 2.5% 97.5%
## 2.736583 37.257456
median(1 - (early.ice.off/late.ice.off), na.rm = T)*100
## [1] 33.47421
quantile(1 - (early.ice.off/late.ice.off), na.rm = T, probs = c(0.025, 0.975))*100
## 2.5% 97.5%
## 9.019866 50.526813
# Percent decrease in recruitment from early to normal ice-off (pos = increase)
median(percent.early.normal)*100
## [1] 21.61131
quantile(percent.early.normal, na.rm = T, probs = c(0.025, 0.975))*100
## 2.5% 97.5%
## 2.736583 37.257456
# Percent decrease in recruitment from early to late ice-off (pos = increase)
median(percent.early.late)*100
## [1] 33.47421
quantile(percent.early.late, na.rm = T, probs = c(0.025, 0.975))*100
## 2.5% 97.5%
## 9.019866 50.526813
median(early.ice.off - normal.ice.off, na.rm = T) /1.09
## [1] -4.394729
quantile(early.ice.off - normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## -11.7525054 -0.5352326
median(early.ice.off - late.ice.off, na.rm = T)/1.09
## [1] -7.705273
quantile(early.ice.off - late.ice.off, na.rm = T, probs = c(0.025, 0.975))
## 2.5% 97.5%
## -20.487252 -1.861312
Graveyard
Model with stocked walleye effect
efish_ice_stocked <- efish_ice_anomaly %>%
dplyr::filter(DOW %in% three.years$DOW) %>%
mutate(fry_stocked = as.factor(ifelse(FRY > 0, 1, 0)))
gam.ice_stocked <- gam(catch ~ s(average_anomaly, by = fry_stocked) + s(log_acres) +
s(x, y) + s(julian_day_survey) +
s(DOW, bs = "re") + s(year_f, bs = "re") +
offset(log(total_effort_1)),
method = "REML", select = T,
family = nb(),
data = efish_ice_stocked)
appraise(gam.ice_stocked)
gratia::draw(gam.ice_stocked, select = c(1, 2))
summary(gam.ice_stocked)