library(tidyverse)
library(sf)
library(mgcv)
library(arrow)
library(gratia)

early.anomaly = -17.52
late.anomaly = 14

# 69037800 Ver - Bad data prior to 2018
# 25000100 Pepin - missing data
surveys_ef <- read.csv("./data/yoy_data.csv")
ice_off <- read.csv("./data/ice_off_summarized.csv")


# 77021500, 27013700, 81001400 - No EF surveys
long_term <- ice_off %>% group_by(DOW) %>% count() %>% filter(n >= 100) %>% 
  filter(!DOW %in% c("27013700", "81001400")) # Lakes without EF surveys

# Calulate average ice off long term
average <- ice_off %>% dplyr::filter(DOW %in% long_term$DOW) %>% 
  filter(year < 1980) %>%
  group_by(DOW) %>% summarise(mean_ice_off = mean(min_ice_off_julian))

anomaly <- average %>% merge(ice_off, by = "DOW") %>% filter(year > 1980) %>%
  mutate(anomaly =  min_ice_off_julian - mean_ice_off) %>%
  group_by(year) %>%
  summarize(average_anomaly = mean(anomaly), 
            sd_anomaly = sd(anomaly))

Filter down to modeling dataset

plot(y = anomaly$average_anomaly, x = anomaly$year)

mean(anomaly$average_anomaly)
## [1] -4.088399
efish_wae_ice <- surveys_ef %>%
  dplyr::select(year, lake_id, catch, total_effort_1, x, y, acres, julian_day_survey, FRY, fry.pa) %>%
  rename(DOW = lake_id) %>%
  mutate(log_acres = log(acres),
         cpue = catch/total_effort_1)
  
  
efish_ice_anomaly <- merge(efish_wae_ice, anomaly, by = c("year")) %>%
  mutate(DOW = as.factor(DOW),
         year_f = as.factor(year),
         average_anomaly_abs = abs(average_anomaly))

three.years <- efish_ice_anomaly %>% group_by(DOW) %>% count() %>% dplyr::filter(n >= 3)
efish_ice <- efish_ice_anomaly %>% dplyr::filter(DOW %in% three.years$DOW)

Survey Info

length(unique(efish_ice$DOW))
## [1] 218
min(efish_ice$year);max(efish_ice$year)
## [1] 1993
## [1] 2022
lakes = efish_ice %>% group_by(DOW) %>% count()

median(lakes$n) # Median surveys per lake = 8.5
## [1] 8.5
num.per.year = efish_ice %>% group_by(year) %>% count() %>% ungroup() 
mean(num.per.year$n)
## [1] 75.56667

Wae recruitment ICE

Uses non lake specific ice off anomalys (only year specific)

gam.ice <- gam(catch ~ s(average_anomaly) + s(log_acres) + s(x, y) + s(julian_day_survey) +
                 s(DOW, bs = "re") + s(year_f, bs = "re") + offset(log(total_effort_1)), 
               method = "REML", select = T,
               family = nb(),
               data = efish_ice)
#write_rds(gam.ice, "./models/yoy_ice_gam.rds")
#gam.ice <- readRDS("./models/yoy_ice_gam.rds")
appraise(gam.ice)

gratia::draw(gam.ice)

summary(gam.ice) # 41.1 % deviance explained
## 
## Family: Negative Binomial(0.587) 
## Link function: log 
## 
## Formula:
## catch ~ s(average_anomaly) + s(log_acres) + s(x, y) + s(julian_day_survey) + 
##     s(DOW, bs = "re") + s(year_f, bs = "re") + offset(log(total_effort_1))
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.9495     0.1108   26.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf Ref.df  Chi.sq  p-value    
## s(average_anomaly)     1.6536      9  120.19  0.00336 ** 
## s(log_acres)           0.9683      9 3143.30  < 2e-16 ***
## s(x,y)                10.8999     29 9358.95  < 2e-16 ***
## s(julian_day_survey)   2.3862      9  114.30 5.67e-06 ***
## s(DOW)               176.0675    217 1243.04  < 2e-16 ***
## s(year_f)             18.5314     29   76.11  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =    0.4   Deviance explained = 41.1%
## -REML =  10266  Scale est. = 1         n = 2267

Fitted samples plot

efish_ice %>% select(total_effort_1, log_acres, julian_day_survey, x, y) %>%
  summarize(across(c(total_effort_1, log_acres, julian_day_survey, x, y), 
      ~ mean(.x, na.rm = TRUE)))
##   total_effort_1 log_acres julian_day_survey        x        y
## 1       1.384742  7.458838           266.247 -94.6739 46.08509
pred.df = data.frame(
  average_anomaly = seq(-24.96848531, 18.6, by = 0.5),
  x = -94.7, y = 46.1,
  julian_day_survey = 266, log_acres = 7,
  DOW = "56038300", year_f = 2011, total_effort_1 = 1.38) %>%
  mutate(.row = row_number())

fs <- fitted_samples(gam.ice, data = pred.df, n = 1000, seed = 1024,
                       exclude = c('s(DOW)',  's(year_f)')) |>
    left_join(pred.df |> select(.row, average_anomaly), 
              by = join_by(.row == .row)) %>%
  mutate(group = as.numeric(.draw))

sm <- smooth_estimates(gam.ice)
jd_smooth <- sm %>%
  filter(.smooth == "s(average_anomaly)" ) %>%
  add_confint() %>%
  add_constant(coef(gam.ice)["(Intercept)"]) %>% 
  transform_fun(inv_link(gam.ice))

fig <- fs %>%
  ggplot() +
  # Thick line (expected relationship)
  geom_line(data = jd_smooth, aes(x = average_anomaly, y = .estimate), lwd = 1.5) +
  geom_ribbon(data = jd_smooth, aes(x = average_anomaly, y = .estimate, ymin = .lower_ci , ymax = .upper_ci), 
              alpha = 0.3) +
  # Thin lines (draws)
  geom_line(aes(x = average_anomaly, y = .fitted, group = group), alpha = 0.03) +
  labs(x ="Ice-off Anomaly", y = "Fall Age-0 Walleye Catch") +
  theme_classic() +
  theme(axis.text.y = element_text(size = 16),
        axis.text.x = element_text(size = 16),
        axis.title.x = element_text(size = 18),
        axis.title.y = element_text(size = 18),
        legend.title = element_text(size = 16), 
        legend.text = element_text(size = 14)) + 
  scale_x_continuous(breaks = c(-15, -10, -5, 0, 5, 10, 15)) +
  coord_cartesian(expand = FALSE, xlim = c(-18, NA), ylim = c(0, 63))

fig

# Figure 3B
ggsave("./figures/Figure3B.jpeg", width = 10, height = 10)

Get estimates of decrease in year-class

data_to_predict <- efish_ice %>% 
  select(average_anomaly, log_acres, x, y, julian_day_survey, total_effort_1, acres) %>%
  summarize(across(everything(), ~median(.x, na.rm = T)))

df.results <- data.frame(average = numeric(0), low = numeric(0), high = numeric(0), 
                         age = numeric(0))

rmvn <- function(n, mu, sig) { ## MVN random deviates 
    L <- mroot(sig);
    m <- ncol(L); 
    t(mu + L%*%matrix(rnorm(m * n), m, n)) 
} 
newd <- data.frame(average_anomaly = c(-17.52, 0, 14), log_acres = log(data_to_predict$acres),
                     x = data_to_predict$x, y = data_to_predict$y, 
                     julian_day_survey = data_to_predict$julian_day_survey, total_effort_1 = data_to_predict$total_effort_1, 
                     DOW = "10005900", year_f = 2010)
  
predicted = predict(gam.ice, newd, type = "response", 
                exclude = c("s(DOW)", "s(year_f)"))
  
Xp <- predict(gam.ice, newd, type = "lpmatrix", 
                exclude = c("s(DOW)", "s(year_f)") )
  
  
br <- rmvn(1000, coef(gam.ice), gam.ice$Vp) ## 1000 replicate param. vectors 
  
early.ice.off <- rep(0, 1000)
normal.ice.off <- rep(0, 1000)
late.ice.off <- rep(0, 1000)
percent.early.normal <- rep(0, 1000)
percent.early.late <- rep(0, 1000)
diff.early.normal <- rep(0,1000)
diff.early.late <- rep(0,1000)

  
for (i in 1:1000) { 
  pr <- Xp %*% br[i,] ## replicate predictions 
  early.ice.off[i] = exp(pr[1,])
  normal.ice.off[i] =  exp(pr[2,]) 
  late.ice.off[i] =  exp(pr[3,])
  percent.early.normal[i] = 1 - (early.ice.off[i]/normal.ice.off[i])
  percent.early.late[i] = 1 - (early.ice.off[i]/late.ice.off[i])
  diff.early.normal[i] <- early.ice.off[i] - normal.ice.off[i]
  diff.early.late[i] <- early.ice.off[i] - late.ice.off[i]
} 
  
median(early.ice.off)
## [1] 13.87396
quantile(early.ice.off, na.rm = T, probs = c(0.025, 0.975))
##      2.5%     97.5% 
##  7.476789 25.638167
median(normal.ice.off) 
## [1] 17.39929
quantile(normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
##      2.5%     97.5% 
##  9.644915 32.600854
median(late.ice.off)
## [1] 20.20792
quantile(late.ice.off, na.rm = T, probs = c(0.025, 0.975))
##     2.5%    97.5% 
## 10.81353 38.62977
# Percent decrease in recruitment from early to normal ice-off (pos = increase)
median(percent.early.normal)*100
## [1] 21.06275
quantile(percent.early.normal, na.rm = T, probs = c(0.025, 0.975))*100
##      2.5%     97.5% 
##  2.179129 37.748553
# Percent decrease in recruitment from early to late ice-off (pos = increase)
median(percent.early.late)*100
## [1] 32.81314
quantile(percent.early.late, na.rm = T, probs = c(0.025, 0.975))*100
##      2.5%     97.5% 
##  9.882068 49.610653
median(1 - (early.ice.off/late.ice.off), na.rm = T)*100
## [1] 32.81314
quantile(1 - (early.ice.off/late.ice.off), na.rm = T, probs = c(0.025, 0.975))*100
##      2.5%     97.5% 
##  9.882068 49.610653
median(early.ice.off - normal.ice.off, na.rm = T) /1.09
## [1] -3.269728
quantile(early.ice.off - normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
##       2.5%      97.5% 
## -9.4527029 -0.4363203
median(early.ice.off - late.ice.off, na.rm = T)/1.09
## [1] -6.021967
quantile(early.ice.off - late.ice.off, na.rm = T, probs = c(0.025, 0.975))
##       2.5%      97.5% 
## -15.831494  -1.511869

Supplement

WAE recruitment lake specific

pre_1980 <- ice_off %>% filter(year < 1981) %>%
  group_by(DOW) %>% count() %>% filter( n > 4)


average <- ice_off %>% 
  filter(DOW %in% pre_1980$DOW) %>% 
  filter(year < 1980) %>%
  group_by(DOW) %>% summarise(mean_ice_off = mean(min_ice_off_julian))

lake_specific_anomaly <- average %>% merge(ice_off, by = "DOW") %>% filter(year > 1980) %>%
  mutate(anomaly =  min_ice_off_julian - mean_ice_off) %>%
  select(DOW, mean_ice_off, year, anomaly)



efish_wae_ice_lake_specific <- surveys_ef %>%
  dplyr::select(year, lake_id, catch, total_effort_1, x, y, acres, julian_day_survey) %>%
  mutate(DOW = lake_id) %>%
  mutate(log_acres = log(acres),
    cpue = catch/total_effort_1) %>%
  mutate(DOW = ifelse(DOW == 73020002, 73020000, DOW),
         DOW = ifelse(DOW == 73019600, 73020000, DOW), 
         DOW = ifelse(DOW == 51006300, 51004600, DOW)
         ) %>%
  merge(lake_specific_anomaly, by = c("year", "DOW")) %>%
  mutate(DOW = as.factor(DOW),
         year_f = as.factor(year),
         anomaly_abs = abs(anomaly))

lake_specific_lakes <- efish_wae_ice_lake_specific %>% group_by(DOW) %>% 
  count() %>% 
  filter(n > 2)

lake_specific <- efish_wae_ice_lake_specific %>% filter(DOW %in% lake_specific_lakes$DOW)

# Number of lakes
length(unique(lake_specific$DOW))
## [1] 36
range(lake_specific$year)
## [1] 1993 2022

Create model for walleye recruitment lake specific

gam.ice.lake.specific <- gam(catch ~ s(anomaly) + s(log_acres)  + s(julian_day_survey) +
                               s(x, y) +
                 s(lake_id, bs = "re") + offset(log(total_effort_1)), 
               method = "REML",
               family = nb(),
               data = lake_specific)
#saveRDS(gam.ice.lake.specific, "./models/yoy_ice_gam_lake_specific.rds")
#gam.ice.lake.specific <- readRDS("./models/yoy_ice_gam_lake_specific.rds")


appraise(gam.ice.lake.specific)

gratia::draw(gam.ice.lake.specific)

summary(gam.ice.lake.specific) #38%
## 
## Family: Negative Binomial(0.578) 
## Link function: log 
## 
## Formula:
## catch ~ s(anomaly) + s(log_acres) + s(julian_day_survey) + s(x, 
##     y) + s(lake_id, bs = "re") + offset(log(total_effort_1))
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.46930    0.06556   52.91   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                            edf Ref.df  Chi.sq p-value    
## s(anomaly)           1.002e+00  1.004   3.335  0.0681 .  
## s(log_acres)         5.920e+00  6.624  79.656  <2e-16 ***
## s(julian_day_survey) 5.091e+00  6.236  15.416  0.0181 *  
## s(x,y)               1.754e+01 20.696 132.955  <2e-16 ***
## s(lake_id)           7.548e-05  1.000   0.000  0.7194    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.596   Deviance explained =   38%
## -REML = 2098.4  Scale est. = 1         n = 423
k.check(gam.ice.lake.specific)
##                      k'          edf   k-index p-value
## s(anomaly)            9 1.001799e+00 0.7689515  0.0225
## s(log_acres)          9 5.920115e+00 0.6095773  0.0000
## s(julian_day_survey)  9 5.091244e+00 0.8376663  0.3650
## s(x,y)               29 1.753968e+01 0.7640590  0.0050
## s(lake_id)            1 7.547995e-05 0.5977862  0.0000
sm <- smooth_estimates(gam.ice.lake.specific)
jd_smooth <- sm %>%
  filter(.smooth == "s(anomaly)" ) %>%
  add_confint() %>%
  add_constant(coef(gam.ice.lake.specific)["(Intercept)"]) %>%
  transform_fun(inv_link(gam.ice.lake.specific))
  

efish_ice_residuals <- lake_specific %>% add_partial_residuals(gam.ice.lake.specific) %>%
  add_constant(coef(gam.ice.lake.specific)["(Intercept)"], column = 16) %>%
  transform_fun(inv_link(gam.ice.lake.specific), 16)

jd_smooth %>%
  ggplot(aes(x = anomaly, y = .estimate)) +
  geom_point(aes(x = anomaly, y = `s(anomaly)`), 
             alpha = 0.2, data = efish_ice_residuals) +
  geom_line(lwd = 1.5) +
  geom_ribbon(aes(ymin = .lower_ci , ymax = .upper_ci), alpha = 0.2) + 
  labs(y = "Fall Age-0 Walleye Catch", x = "Ice-Off Anomaly") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 85)) + 
  #ggtitle("Effects of Anomalous Ice-Off on Walleye Recruitment") + 
  theme_classic() +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 18),
        plot.title = element_text(size = 23, hjust = 0.5, face = "bold"))

# Figure S3
ggsave("./figures/FigureS3.jpeg", height = 10, width = 12)

Get estimates of decrease in recruitment with lake specific ice anomalies

data_to_predict <- lake_specific %>% 
  select(anomaly, log_acres, x, y, julian_day_survey, total_effort_1, acres) %>%
  summarize(across(everything(), ~median(.x, na.rm = T)))

df.results <- data.frame(average = numeric(0), low = numeric(0), high = numeric(0), 
                         age = numeric(0))

rmvn <- function(n, mu, sig) { ## MVN random deviates 
    L <- mroot(sig);
    m <- ncol(L); 
    t(mu + L%*%matrix(rnorm(m * n), m, n)) 
} 
newd <- data.frame(average_anomaly = c(-17.52, 0, 14), log_acres = log(data_to_predict$acres),
                     x = data_to_predict$x, y = data_to_predict$y, 
                     julian_day_survey = data_to_predict$julian_day_survey, total_effort_1 = data_to_predict$total_effort_1, 
                     DOW = "10005900", year_f = 2010)
  
predicted = predict(gam.ice, newd, type = "response", 
                exclude = c("s(DOW)", "s(year_f)"))
  
Xp <- predict(gam.ice, newd, type = "lpmatrix", 
                exclude = c("s(DOW)", "s(year_f)") )
  
  
br <- rmvn(1000, coef(gam.ice), gam.ice$Vp) ## 1000 replicate param. vectors 
  
early.ice.off <- rep(0, 1000)
normal.ice.off <- rep(0, 1000)
late.ice.off <- rep(0, 1000)
percent.early.normal <- rep(0, 1000)
percent.early.late <- rep(0, 1000)
diff.early.normal <- rep(0,1000)
diff.early.late <- rep(0,1000)

  
for (i in 1:1000) { 
  pr <- Xp %*% br[i,] ## replicate predictions 
  early.ice.off[i] = exp(pr[1,])
  normal.ice.off[i] =  exp(pr[2,]) 
  late.ice.off[i] =  exp(pr[3,])
  percent.early.normal[i] = 1 - (early.ice.off[i]/normal.ice.off[i])
  percent.early.late[i] = 1 - (early.ice.off[i]/late.ice.off[i])
  diff.early.normal[i] <- early.ice.off[i] - normal.ice.off[i]
  diff.early.late[i] <- early.ice.off[i] - late.ice.off[i]
} 
  
median(early.ice.off)
## [1] 17.01221
quantile(early.ice.off, na.rm = T, probs = c(0.025, 0.975))
##      2.5%     97.5% 
##  9.098088 31.999479
median(normal.ice.off) 
## [1] 21.78031
quantile(normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
##     2.5%    97.5% 
## 11.68245 41.21749
median(late.ice.off)
## [1] 25.73516
quantile(late.ice.off, na.rm = T, probs = c(0.025, 0.975))
##     2.5%    97.5% 
## 13.06387 48.35451
median(1 - (early.ice.off/normal.ice.off), na.rm = T)*100
## [1] 21.61131
quantile(1 - (early.ice.off/normal.ice.off), na.rm = T, probs = c(0.025, 0.975))*100
##      2.5%     97.5% 
##  2.736583 37.257456
median(1 - (early.ice.off/late.ice.off), na.rm = T)*100
## [1] 33.47421
quantile(1 - (early.ice.off/late.ice.off), na.rm = T, probs = c(0.025, 0.975))*100
##      2.5%     97.5% 
##  9.019866 50.526813
# Percent decrease in recruitment from early to normal ice-off (pos = increase)
median(percent.early.normal)*100
## [1] 21.61131
quantile(percent.early.normal, na.rm = T, probs = c(0.025, 0.975))*100
##      2.5%     97.5% 
##  2.736583 37.257456
# Percent decrease in recruitment from early to late ice-off (pos = increase)
median(percent.early.late)*100
## [1] 33.47421
quantile(percent.early.late, na.rm = T, probs = c(0.025, 0.975))*100
##      2.5%     97.5% 
##  9.019866 50.526813
median(early.ice.off - normal.ice.off, na.rm = T) /1.09
## [1] -4.394729
quantile(early.ice.off - normal.ice.off, na.rm = T, probs = c(0.025, 0.975))
##        2.5%       97.5% 
## -11.7525054  -0.5352326
median(early.ice.off - late.ice.off, na.rm = T)/1.09
## [1] -7.705273
quantile(early.ice.off - late.ice.off, na.rm = T, probs = c(0.025, 0.975))
##       2.5%      97.5% 
## -20.487252  -1.861312

Graveyard

Model with stocked walleye effect

efish_ice_stocked <- efish_ice_anomaly %>% 
  dplyr::filter(DOW %in% three.years$DOW) %>% 
  mutate(fry_stocked = as.factor(ifelse(FRY > 0, 1, 0)))


gam.ice_stocked <- gam(catch ~ s(average_anomaly, by = fry_stocked) + s(log_acres) + 
                 s(x, y) + s(julian_day_survey) +
                 s(DOW, bs = "re") + s(year_f, bs = "re") + 
                 offset(log(total_effort_1)), 
               method = "REML", select = T,
               family = nb(),
               data = efish_ice_stocked)

appraise(gam.ice_stocked)
gratia::draw(gam.ice_stocked, select = c(1, 2))
summary(gam.ice_stocked)