Secchi_Model

#THIS FILE CONTAINS CODE FOR MODELING SECCHI DEPTH IN A GENERAL MIXED EFFECTS LINEAR REGRESSION MODEL

# STEP 0: LOAD PACKAGES
library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.3.3

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tibble' was built under R version 4.3.3

## Warning: package 'tidyr' was built under R version 4.3.3

## Warning: package 'readr' was built under R version 4.3.3

## Warning: package 'purrr' was built under R version 4.3.3

## Warning: package 'dplyr' was built under R version 4.3.3

## Warning: package 'stringr' was built under R version 4.3.3

## Warning: package 'forcats' was built under R version 4.3.3

## Warning: package 'lubridate' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(readxl)

## Warning: package 'readxl' was built under R version 4.3.3

library(lme4)

## Warning: package 'lme4' was built under R version 4.3.3

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 4.3.3

## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(lmerTest)

## Warning: package 'lmerTest' was built under R version 4.3.3

## 
## Attaching package: 'lmerTest'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer
## 
## The following object is masked from 'package:stats':
## 
##     step

library(performance)

## Warning: package 'performance' was built under R version 4.3.3

library(emmeans)

## Warning: package 'emmeans' was built under R version 4.2.3

library(multcomp)

## Warning: package 'multcomp' was built under R version 4.2.3

## Loading required package: mvtnorm

## Warning: package 'mvtnorm' was built under R version 4.3.3

## Loading required package: survival
## Loading required package: TH.data

## Warning: package 'TH.data' was built under R version 4.2.3

## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## Attaching package: 'TH.data'
## 
## The following object is masked from 'package:MASS':
## 
##     geyser

library(boot)

## Warning: package 'boot' was built under R version 4.2.3

## 
## Attaching package: 'boot'
## 
## The following object is masked from 'package:survival':
## 
##     aml

library(broom.mixed)

## Warning: package 'broom.mixed' was built under R version 4.2.3

# STEP 1: LOAD IN WATER QUALITY DATA
WQSAV <- read.csv("C:/Users/k_hem/OneDrive/Desktop/UMN WQ Data/WaterQualityFix.csv")
# SET REFERENCE LAYER
levels <- c("pre", "during", "post") # CREATE DATAFRAME
WQSAV$period <- factor(WQSAV$period, levels = levels) # CALL DATAFRAME WQSAV, COLUMN PERIOD AND ASSIGN THE LEVELS VIA 'LEVELS'

WQSAV$secchi <- as.numeric(WQSAV$secchi)# Ensure Secchi is numeric

COMMON ABBREVIATIONS THROUGHOUT WORKFLOW: SECCHI = SECCHI DEPTH IN METERS ALUM = ALUMINUM SULFATE TREATMENT PERIOD = GROUPING OF DATA BASED ON WHEN ALUM TREATMENT OCCURRED PRE = PRE-ALUM TREATMENT, BEFORE ANY ALUM HAS BEEN APPLIED DURING = DURING-ALUM TREATMENT, BETWEEN THE FIRST AND LAST ALUM TREATMENT APPLICATION POST = POST-ALUM TREATMENT, AFTER ALUM HAS BEEN APPLIED SEASON = TIME OF YEAR SAMPLING TOOK PLACE EARLY = SAMPLING PERFORMED BETWEEN APRIL AND JUNE LATE = SAMPLING PERFORMED BETWEEN JULY AND SEPTEMBER D_S = DEPTH CLASSIFICATION OF LAKE BASED ON MAX DEPTH DEEP (D)= MAX DEPTH GREATER THAN 15 FEET SHALLOW (S) = MAX DEPTH LESS THAN 15 FEET

# STEP 2: EXPLORATORY ANALYSIS
# STEP 2.1: DISTRIBUTION OF DATA

# HISTOGRAM OF RAW SECCHI DATA
hist(WQSAV$secchi,# CALL DATAFRAME AND DESIRED COLUMN
     main = "Histogram of Secchi (m)",  # TITLE
     xlab = "Secchi (m)",  # X-AXIS LABEL
     ylab = "Frequency"  # Y-AXIS
     )

# DATA IS SKEWED, WILL NEED TO LOG AND/OR SCALE TO ACHIEVE NORMALITY

# STEP 2.2: INTERACTIONS BETWEEN SECCHI AND INDEPENDENT VARIABLES
# DEPTH OF LAKE AND SECCHI
ggplot(WQSAV, aes(x = d_s, y = secchi)) + # CALL DATAFRAME, X-AXIS DATA: DEPTH, Y-AXIS DATA: TP
  geom_boxplot() # SPECIFY BOXPLOT

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# TIME OF SAMPLING AND SECCHI
ggplot(WQSAV, aes(x = season, y = secchi)) + # CALL DATAFRAME, X-AXIS DATA: TIME OF SAMPLING, Y-AXIS DATA: SECCHI
  geom_boxplot() # SPECIFY BOXPLOT

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# TIME OF TREATMENT AND SECCHI
ggplot(WQSAV, aes(x = period, y = secchi)) + # CALL DATAFRAME, X-AXIS DATA: TIME OF SAMPLING, Y-AXIS DATA: SECCHI
  geom_boxplot() # SPECIFY BOXPLOT

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# STEP 2.3: INTERACTIONS BETWEEN SECCHI AND MULTIPLE INDEPENDENT VARIABLES
# DEPTH OF LAKE, TIME OF SAMPLING, AND SECCHI
ggplot(WQSAV, aes(x = d_s, y = secchi, fill = season)) + # CALL DATAFRAME, X-AXIS DATA: DEPTH, Y-AXIS DATA: SECCHI, FILL OF BOXPLOT: TIME OF SAMPLING
  geom_boxplot() # SPECIFY BOXPLOT

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# DEPTH OF LAKE, TIME OF TREATMENT, AND SECCHI
ggplot(WQSAV, aes(x = d_s, y = secchi, fill = period)) + # CALL OF DATAFRAME, X-AXIS DATA: DEPTH, Y-AXIS DATA: SECCHI, FILL OF BOXPLOT: TIME OF TREATMENT
  geom_boxplot() # SPECIFY BOXPLOT

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# TIME OF YEAR, TIME OF TREATMENT, AND SECCHI
ggplot(WQSAV, aes(x = season, y = secchi, fill = period)) + # CALL DATAFRAME, X-AXIS DATA: TIME OF SAMPLING, Y-AXIS DATA: SECCHI, FILL OF BOXPLOT: TIME OF TREATMENT
  geom_boxplot() # SPECIFY BOXPLOT

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# TIME OF SAMPLING, TIME OF TREATMENT, DEPTH OF LAKE, AND SECCHI
ggplot(WQSAV, aes(x = season, y = secchi, fill = period)) + # CALL DATAFRAME, X-AXIS DATA: TIME OF SAMPLING, Y-AXIS DATA: SECCHI, FILL OF BOXPLOT: TIME OF TREATMENT
  geom_boxplot() + # SPECIFY BOXPLOT
  facet_wrap(~d_s) # FACET WRAP BY DEPTH TO ACHIEVE ALL THREE PARAMETERS ON THE GRAPH

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# STEP 3: MODELING
# STEP 3.1: INITIAL MODEL WITH A 3-WAY INTERACTION
secmodel1 <- glmer(scale(secchi + 1, FALSE, TRUE) ~ period * d_s * season # SCALE SECCHI (MODEL WILL LOG), DEPENDENT VARIABLE ~ INDEPENDENT VARIABLES
                   + (1|lake_name) + (1|year), # ASSIGN RANDOM EFFECTS
                   control = glmerControl(optimizer = 'bobyqa', optCtrl = list(maxfun = 1e07)), # SET MODEL 'SETTINGS'
                   family = "Gamma", # SPECIFY FAMILY TO PROCESS WITH
                   data = WQSAV) # SPECIFY DATAFRAME

# REVIEW MODEL RESULTS
summary(secmodel1) # SPECIFY MODEL

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( inverse )
## Formula: scale(secchi + 1, FALSE, TRUE) ~ period * d_s * season + (1 |  
##     lake_name) + (1 | year)
##    Data: WQSAV
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+07))
## 
##      AIC      BIC   logLik deviance df.resid 
##   -278.1   -203.2    154.0   -308.1     1073 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2649 -0.6374 -0.1782  0.4834  6.4639 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  year      (Intercept) 0.001241 0.03523 
##  lake_name (Intercept) 0.005631 0.07504 
##  Residual              0.069245 0.26314 
## Number of obs: 1088, groups:  year, 13; lake_name, 7
## 
## Fixed effects:
##                              Estimate Std. Error t value Pr(>|z|)    
## (Intercept)                   0.90768    0.10588   8.573  < 2e-16 ***
## periodduring                 -0.10640    0.03355  -3.171  0.00152 ** 
## periodpost                   -0.20521    0.03989  -5.144 2.69e-07 ***
## d_sS                          0.28283    0.14716   1.922  0.05462 .  
## seasonlate                    0.71989    0.03226  22.319  < 2e-16 ***
## periodduring:d_sS             0.07054    0.06072   1.162  0.24537    
## periodpost:d_sS               0.04346    0.06693   0.649  0.51613    
## periodduring:seasonlate      -0.34792    0.04948  -7.032 2.04e-12 ***
## periodpost:seasonlate        -0.41671    0.04493  -9.275  < 2e-16 ***
## d_sS:seasonlate              -0.10653    0.06352  -1.677  0.09353 .  
## periodduring:d_sS:seasonlate  0.04843    0.08596   0.563  0.57314    
## periodpost:d_sS:seasonlate    0.05902    0.09697   0.609  0.54274    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prddrn prdpst d_sS   sesnlt prdd:_S prdp:_S prddr: prdps:
## perioddurng -0.090                                                          
## periodpost  -0.091  0.389                                                   
## d_sS        -0.661  0.050  0.026                                            
## seasonlate  -0.087  0.245  0.197  0.064                                     
## prddrng:d_S  0.044 -0.446 -0.071 -0.186 -0.140                              
## prdpst:d_sS  0.035 -0.150 -0.249 -0.148 -0.126  0.397                       
## prddrng:ssn  0.053 -0.461 -0.107 -0.042 -0.648  0.261   0.089               
## prdpst:ssnl  0.062 -0.176 -0.364 -0.046 -0.717  0.099   0.224   0.465       
## d_sS:sesnlt  0.044 -0.131 -0.102 -0.154 -0.508  0.363   0.332   0.330  0.364
## prddrng:_S: -0.031  0.274  0.071  0.114  0.373 -0.543  -0.249  -0.576 -0.268
## prdpst:d_S: -0.029  0.085  0.170  0.101  0.332 -0.238  -0.654  -0.216 -0.463
##             d_sS:s prdd:_S:
## perioddurng                
## periodpost                 
## d_sS                       
## seasonlate                 
## prddrng:d_S                
## prdpst:d_sS                
## prddrng:ssn                
## prdpst:ssnl                
## d_sS:sesnlt                
## prddrng:_S: -0.739         
## prdpst:d_S: -0.655  0.484

# EXPLORATORY GRAPHS FOR MODEL 
check_model(secmodel1) # SPECIFY MODEL

hist(resid(secmodel1)) # SPECIFY MODEL

# ASSESS OUTLIERS WITH COOKS DISTANCE
cooksd <- cooks.distance(secmodel1) # SPECIFY MODEL

## Warning in hatvalues.merMod(model): the hat matrix may not make sense for GLMMs

which(cooksd > 4*mean(cooksd)) # SPECIFY DATAFRAME

## 146 153 281 282 290 292 347 349 545 553 554 581 598 602 657 680 701 726 796 854 
## 146 153 280 281 289 291 346 348 536 544 545 572 589 593 647 670 691 715 784 841 
## 879 883 918 919 940 955 956 957 958 
## 866 870 902 903 923 938 939 940 941

which(cooksd == max(cooksd)) # SPECIFY DATAFRAME

## 282 
## 281

# STEP 3.2: MODEL WITHOUT POTENTIAL INFLUENTIAL POINTS
# REMOVE POTENTIAL INFLUENTIAL POINTS/OUTLIERS
# ANY SIGN CHANGES/CHANGES IN SIGNIFICANCE?
# REPEAT UNTIL ALL HAVE BEEN TESTED
secmodel3 <- glmer(
  scale(secchi + 1, FALSE, TRUE) ~ period * d_s * season + # SCALE SECCHI (MODEL WILL LOG), DEPENDENT VARIABLE ~ INDEPENDENT VARIABLES
    (1|lake_name) + (1|year), # ASSIGN RANDOM EFFECTS
  control = glmerControl(optimizer = 'bobyqa', optCtrl = list(maxfun = 1e07)),
  family = "Gamma", # SPECIFY FAMILY TO PROCESS WITH
  data = WQSAV[-339,] # SPECIFY DATAFRAME AND SPECIFY POINT TO REMOVE
)

# REVIEW MODEL AND COMPARE WITH ORIGINAL MODEL RESULTS
summary(secmodel3) # SPECIFY MODEL

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( inverse )
## Formula: scale(secchi + 1, FALSE, TRUE) ~ period * d_s * season + (1 |  
##     lake_name) + (1 | year)
##    Data: WQSAV[-339, ]
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+07))
## 
##      AIC      BIC   logLik deviance df.resid 
##   -276.7   -201.9    153.4   -306.7     1072 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2642 -0.6378 -0.1790  0.4821  6.4627 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  year      (Intercept) 0.001240 0.03521 
##  lake_name (Intercept) 0.005641 0.07510 
##  Residual              0.069288 0.26323 
## Number of obs: 1087, groups:  year, 13; lake_name, 7
## 
## Fixed effects:
##                              Estimate Std. Error t value Pr(>|z|)    
## (Intercept)                   0.90791    0.10596   8.569  < 2e-16 ***
## periodduring                 -0.10631    0.03358  -3.166  0.00154 ** 
## periodpost                   -0.20518    0.03990  -5.143 2.71e-07 ***
## d_sS                          0.28303    0.14727   1.922  0.05463 .  
## seasonlate                    0.72013    0.03228  22.309  < 2e-16 ***
## periodduring:d_sS             0.07047    0.06076   1.160  0.24618    
## periodpost:d_sS               0.04340    0.06698   0.648  0.51697    
## periodduring:seasonlate      -0.34799    0.04952  -7.028 2.10e-12 ***
## periodpost:seasonlate        -0.41685    0.04496  -9.271  < 2e-16 ***
## d_sS:seasonlate              -0.10808    0.06370  -1.697  0.08974 .  
## periodduring:d_sS:seasonlate  0.04993    0.08611   0.580  0.56206    
## periodpost:d_sS:seasonlate    0.06056    0.09712   0.624  0.53293    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prddrn prdpst d_sS   sesnlt prdd:_S prdp:_S prddr: prdps:
## perioddurng -0.090                                                          
## periodpost  -0.091  0.389                                                   
## d_sS        -0.661  0.051  0.026                                            
## seasonlate  -0.087  0.245  0.197  0.064                                     
## prddrng:d_S  0.044 -0.446 -0.071 -0.186 -0.140                              
## prdpst:d_sS  0.035 -0.150 -0.250 -0.148 -0.126  0.397                       
## prddrng:ssn  0.053 -0.461 -0.107 -0.042 -0.648  0.261   0.089               
## prdpst:ssnl  0.062 -0.176 -0.364 -0.046 -0.717  0.099   0.224   0.465       
## d_sS:sesnlt  0.044 -0.131 -0.102 -0.154 -0.507  0.362   0.332   0.329  0.363
## prddrng:_S: -0.031  0.275  0.071  0.114  0.373 -0.542  -0.249  -0.575 -0.268
## prdpst:d_S: -0.029  0.086  0.170  0.101  0.332 -0.238  -0.653  -0.216 -0.463
##             d_sS:s prdd:_S:
## perioddurng                
## periodpost                 
## d_sS                       
## seasonlate                 
## prddrng:d_S                
## prdpst:d_sS                
## prddrng:ssn                
## prdpst:ssnl                
## d_sS:sesnlt                
## prddrng:_S: -0.739         
## prdpst:d_S: -0.656  0.485

# NONE OF THE POTENITAL INFLUENTIAL POINTS WERE FOUND TO BE INFLUENTIAL ON THE MODEL

# STEP 3.3: REMOVE 3-WAY INTERACTION DUE TO LACK OF SIGNIFICANCE
secmodel2 <- glmer(scale(secchi + 1, FALSE, TRUE) ~ period * d_s + d_s * season + period * season + # SCALE SECCHI (MODEL WILL LOG), DEPENDENT VARIABLE ~ INDEPENDENT VARIABLES, REMOVE 3-WAY INTERACTION
                     (1|lake_name) + (1|year), # ASSIGN RANDOM EFFECTS
                   control = glmerControl(optimizer = 'bobyqa', optCtrl = list(maxfun = 1e07)), # SET MODEL 'SETTINGS'
                   family = "Gamma", # SPECIFY FAMILY TO PROCESS WITH
                   data = WQSAV) # SPECIFY DATAFRAME

# REVIEW MODEL RESULTS
summary(secmodel2) # SPECIFY MODEL

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Gamma  ( inverse )
## Formula: scale(secchi + 1, FALSE, TRUE) ~ period * d_s + d_s * season +  
##     period * season + (1 | lake_name) + (1 | year)
##    Data: WQSAV
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+07))
## 
##      AIC      BIC   logLik deviance df.resid 
##   -281.6   -216.7    153.8   -307.6     1075 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2577 -0.6276 -0.1713  0.4892  6.4325 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  year      (Intercept) 0.001239 0.03520 
##  lake_name (Intercept) 0.005633 0.07505 
##  Residual              0.069362 0.26337 
## Number of obs: 1088, groups:  year, 13; lake_name, 7
## 
## Fixed effects:
##                         Estimate Std. Error t value Pr(>|z|)    
## (Intercept)              0.91020    0.10571   8.610  < 2e-16 ***
## periodduring            -0.11089    0.03220  -3.444 0.000574 ***
## periodpost              -0.20916    0.03928  -5.325 1.01e-07 ***
## d_sS                     0.27051    0.14586   1.855 0.063656 .  
## seasonlate               0.71099    0.02936  24.214  < 2e-16 ***
## periodduring:d_sS        0.08838    0.05097   1.734 0.082959 .  
## periodpost:d_sS          0.06852    0.05045   1.358 0.174446    
## d_sS:seasonlate         -0.07180    0.03704  -1.939 0.052544 .  
## periodduring:seasonlate -0.33326    0.04033  -8.263  < 2e-16 ***
## periodpost:seasonlate   -0.40339    0.03976 -10.145  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) prddrn prdpst d_sS   sesnlt prdd:_S prdp:_S d_sS:s prddr:
## perioddurng -0.086                                                          
## periodpost  -0.088  0.401                                                   
## d_sS        -0.662  0.024  0.010                                            
## seasonlate  -0.081  0.176  0.162  0.015                                     
## prddrng:d_S  0.033 -0.367 -0.046 -0.151  0.071                              
## prdpst:d_sS  0.023 -0.161 -0.185 -0.116  0.102  0.440                       
## d_sS:sesnlt  0.027  0.094  0.004 -0.090 -0.325 -0.063  -0.144               
## prddrng:ssn  0.045 -0.383 -0.097  0.024 -0.600 -0.075  -0.012  -0.152       
## prdpst:ssnl  0.055 -0.146 -0.328  0.004 -0.682 -0.044  -0.114   0.070  0.468

# EXPLORATORY GRAPHS FOR MODEL  
check_model(secmodel2) # SPECIFY MODEL

hist(resid(secmodel2)) # SPECIFY MODEL

# ASSESS OUTLIERS WITH COOKS DISTANCE
cooksd <- cooks.distance(secmodel2) # SPECIFY MODEL

## Warning in hatvalues.merMod(model): the hat matrix may not make sense for GLMMs

which(cooksd > 4*mean(cooksd)) # REFERENCE DATAFRAME

##  97 146 153 281 282 290 292 347 349 545 553 554 581 598 602 657 680 701 726 796 
##  97 146 153 280 281 289 291 346 348 536 544 545 572 589 593 647 670 691 715 784 
## 854 879 883 918 919 940 955 956 957 958 
## 841 866 870 902 903 923 938 939 940 941

which(cooksd == max(cooksd)) # REFERENCE DATAFRAME

## 282 
## 281

# FOLLOWING CODE IS USED TO EXTRACT P-VALUES TO SET UP ADDRESSING FALSE POSITIVES
# EXTRACTING DETAILED SUMMARY WITH P-VALUES
summary_secmodel2 <- tidy(secmodel2, effects = "fixed")

# PRINT THE SUMMARY FOR THE MODEL
print(summary_secmodel2)

## # A tibble: 10 × 6
##    effect term                    estimate std.error statistic   p.value
##    <chr>  <chr>                      <dbl>     <dbl>     <dbl>     <dbl>
##  1 fixed  (Intercept)               0.910     0.106       8.61 7.30e- 18
##  2 fixed  periodduring             -0.111     0.0322     -3.44 5.74e-  4
##  3 fixed  periodpost               -0.209     0.0393     -5.33 1.01e-  7
##  4 fixed  d_sS                      0.271     0.146       1.85 6.37e-  2
##  5 fixed  seasonlate                0.711     0.0294     24.2  1.60e-129
##  6 fixed  periodduring:d_sS         0.0884    0.0510      1.73 8.30e-  2
##  7 fixed  periodpost:d_sS           0.0685    0.0505      1.36 1.74e-  1
##  8 fixed  d_sS:seasonlate          -0.0718    0.0370     -1.94 5.25e-  2
##  9 fixed  periodduring:seasonlate  -0.333     0.0403     -8.26 1.42e- 16
## 10 fixed  periodpost:seasonlate    -0.403     0.0398    -10.1  3.50e- 24

# STEP 4: EXTRACT MODEL INFORMATION
# STEP 4.1: EXTRACT SIGNIFICANCE LETTERS FOR FULL MODEL
emmsec <- emmeans(secmodel2, specs = ~ period + season + d_s, method = "tukey") # EXTRACT MEANS, SPECIFY MODEL, SPECIFY DEPENDENT ~ INDEPENDENT VARIABLES
summary(emmsec)

##  period season d_s emmean    SE  df asymp.LCL asymp.UCL
##  pre    early  D    0.910 0.106 Inf     0.703     1.117
##  during early  D    0.799 0.108 Inf     0.588     1.011
##  post   early  D    0.701 0.109 Inf     0.486     0.916
##  pre    late   D    1.621 0.107 Inf     1.411     1.832
##  during late   D    1.177 0.109 Inf     0.963     1.391
##  post   late   D    1.009 0.110 Inf     0.793     1.225
##  pre    early  S    1.181 0.110 Inf     0.966     1.396
##  during early  S    1.158 0.110 Inf     0.942     1.375
##  post   early  S    1.040 0.115 Inf     0.814     1.266
##  pre    late   S    1.820 0.111 Inf     1.601     2.038
##  during late   S    1.464 0.111 Inf     1.247     1.681
##  post   late   S    1.276 0.114 Inf     1.052     1.500
## 
## Results are given on the inverse (not the response) scale. 
## Confidence level used: 0.95

post_hoc_results2 <- pairs(emmsec) # CREATE POST-HOC RESULTS FOR SIG
summary(post_hoc_results2)

##  contrast                        estimate     SE  df z.ratio p.value
##  pre early D - during early D     0.11089 0.0322 Inf   3.444  0.0285
##  pre early D - post early D       0.20916 0.0393 Inf   5.325  <.0001
##  pre early D - pre late D        -0.71099 0.0294 Inf -24.214  <.0001
##  pre early D - during late D     -0.26685 0.0380 Inf  -7.024  <.0001
##  pre early D - post late D       -0.09844 0.0417 Inf  -2.360  0.4338
##  pre early D - pre early S       -0.27051 0.1459 Inf  -1.855  0.7873
##  pre early D - during early S    -0.24800 0.1473 Inf  -1.684  0.8759
##  pre early D - post early S      -0.12987 0.1518 Inf  -0.856  0.9995
##  pre early D - pre late S        -0.90970 0.1482 Inf  -6.138  <.0001
##  pre early D - during late S     -0.55393 0.1471 Inf  -3.766  0.0091
##  pre early D - post late S       -0.36567 0.1505 Inf  -2.430  0.3852
##  during early D - post early D    0.09828 0.0396 Inf   2.483  0.3504
##  during early D - pre late D     -0.82188 0.0396 Inf -20.767  <.0001
##  during early D - during late D  -0.37774 0.0327 Inf -11.555  <.0001
##  during early D - post late D    -0.20932 0.0425 Inf  -4.926  0.0001
##  during early D - pre early S    -0.38140 0.1486 Inf  -2.566  0.2990
##  during early D - during early S -0.35889 0.1471 Inf  -2.440  0.3787
##  during early D - post early S   -0.24076 0.1528 Inf  -1.575  0.9182
##  during early D - pre late S     -1.02059 0.1491 Inf  -6.847  <.0001
##  during early D - during late S  -0.66482 0.1484 Inf  -4.480  0.0005
##  during early D - post late S    -0.47655 0.1509 Inf  -3.157  0.0697
##  post early D - pre late D       -0.92015 0.0451 Inf -20.412  <.0001
##  post early D - during late D    -0.47601 0.0437 Inf -10.898  <.0001
##  post early D - post late D      -0.30760 0.0291 Inf -10.553  <.0001
##  post early D - pre early S      -0.47967 0.1507 Inf  -3.184  0.0645
##  post early D - during early S   -0.45716 0.1493 Inf  -3.062  0.0914
##  post early D - post early S     -0.33903 0.1487 Inf  -2.280  0.4909
##  post early D - pre late S       -1.11886 0.1517 Inf  -7.377  <.0001
##  post early D - during late S    -0.76309 0.1489 Inf  -5.126  <.0001
##  post early D - post late S      -0.57483 0.1495 Inf  -3.845  0.0067
##  pre late D - during late D       0.44414 0.0408 Inf  10.874  <.0001
##  pre late D - post late D         0.61255 0.0458 Inf  13.371  <.0001
##  pre late D - pre early S         0.44048 0.1483 Inf   2.969  0.1176
##  pre late D - during early S      0.46299 0.1479 Inf   3.130  0.0754
##  pre late D - post early S        0.58112 0.1520 Inf   3.824  0.0073
##  pre late D - pre late S         -0.19871 0.1472 Inf  -1.350  0.9725
##  pre late D - during late S       0.15706 0.1491 Inf   1.053  0.9964
##  pre late D - post late S         0.34533 0.1525 Inf   2.264  0.5028
##  during late D - post late D      0.16841 0.0440 Inf   3.829  0.0072
##  during late D - pre early S     -0.00366 0.1486 Inf  -0.025  1.0000
##  during late D - during early S   0.01885 0.1496 Inf   0.126  1.0000
##  during late D - post early S     0.13698 0.1518 Inf   0.902  0.9991
##  during late D - pre late S      -0.64285 0.1519 Inf  -4.232  0.0014
##  during late D - during late S   -0.28708 0.1476 Inf  -1.944  0.7311
##  during late D - post late S     -0.09882 0.1530 Inf  -0.646  1.0000
##  post late D - pre early S       -0.17207 0.1507 Inf  -1.142  0.9928
##  post late D - during early S    -0.14956 0.1494 Inf  -1.001  0.9977
##  post late D - post early S      -0.03143 0.1514 Inf  -0.208  1.0000
##  post late D - pre late S        -0.81126 0.1529 Inf  -5.304  <.0001
##  post late D - during late S     -0.45550 0.1499 Inf  -3.039  0.0975
##  post late D - post late S       -0.26723 0.1482 Inf  -1.803  0.8170
##  pre early S - during early S     0.02251 0.0493 Inf   0.457  1.0000
##  pre early S - post early S       0.14064 0.0579 Inf   2.428  0.3869
##  pre early S - pre late S        -0.63919 0.0391 Inf -16.359  <.0001
##  pre early S - during late S     -0.28342 0.0542 Inf  -5.230  <.0001
##  pre early S - post late S       -0.09515 0.0612 Inf  -1.555  0.9249
##  during early S - post early S    0.11813 0.0567 Inf   2.083  0.6350
##  during early S - pre late S     -0.66170 0.0585 Inf -11.310  <.0001
##  during early S - during late S  -0.30593 0.0358 Inf  -8.549  <.0001
##  during early S - post late S    -0.11766 0.0602 Inf  -1.955  0.7244
##  post early S - pre late S       -0.77983 0.0688 Inf -11.338  <.0001
##  post early S - during late S    -0.42406 0.0627 Inf  -6.763  <.0001
##  post early S - post late S      -0.23580 0.0415 Inf  -5.684  <.0001
##  pre late S - during late S       0.35577 0.0524 Inf   6.784  <.0001
##  pre late S - post late S         0.54403 0.0588 Inf   9.257  <.0001
##  during late S - post late S      0.18827 0.0552 Inf   3.412  0.0316
## 
## Note: contrasts are still on the inverse scale 
## Results are given on the scale(0, 3.09) (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 12 estimates

significance_letters2 <- cld(emmsec) # EXTRACT SIGNIFICANCE LETTERS UTILIZING POST-HOC RESULTS
print(significance_letters2) # PRINT OUTPUT

##  period season d_s emmean    SE  df asymp.LCL asymp.UCL .group      
##  post   early  D    0.701 0.109 Inf     0.486     0.916  1          
##  during early  D    0.799 0.108 Inf     0.588     1.011  12         
##  pre    early  D    0.910 0.106 Inf     0.703     1.117    34       
##  post   late   D    1.009 0.110 Inf     0.793     1.225    345      
##  post   early  S    1.040 0.115 Inf     0.814     1.266  1 3  6     
##  during early  S    1.158 0.110 Inf     0.942     1.375  1234 678   
##  during late   D    1.177 0.109 Inf     0.963     1.391       67 9  
##  pre    early  S    1.181 0.110 Inf     0.966     1.396  1234 678   
##  post   late   S    1.276 0.114 Inf     1.052     1.500   2 4  78   
##  during late   S    1.464 0.111 Inf     1.247     1.681      5   90 
##  pre    late   D    1.621 0.107 Inf     1.411     1.832         8 0A
##  pre    late   S    1.820 0.111 Inf     1.601     2.038            A
## 
## Results are given on the inverse (not the response) scale. 
## Confidence level used: 0.95 
## Note: contrasts are still on the inverse scale 
## Results are given on the scale(0, 3.09) (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 12 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

# STEP 4.2: MEANS FROM RAW DATA
raw_means_season <- aggregate(secchi ~ period + season + d_s , data = WQSAV, FUN = mean) # SPECIFY DEPENDENT ~ INDEPENDENT VARIABLES, SPECIFY DATA, SPECIFY OUTPUT
print(raw_means_season) # PRINT OUTPUT

##    period season d_s    secchi
## 1     pre  early   D 2.4947031
## 2  during  early   D 3.7897059
## 3    post  early   D 3.6665531
## 4     pre   late   D 0.9078858
## 5  during   late   D 1.9438028
## 6    post   late   D 2.2440440
## 7     pre  early   S 1.6087500
## 8  during  early   S 2.0256343
## 9    post  early   S 1.8908929
## 10    pre   late   S 0.7127358
## 11 during   late   S 1.3223050
## 12   post   late   S 1.3345833

# STEP 4.3: MANUALLY ASSIGN SIGNIFICANCE LETTERS AND RAW MEANS
letters2 <- data.frame(
  period = factor(c("pre", "during", "post",
                    "pre", "during", "post", 
                    "pre", "during", "post", 
                    "pre", "during", "post")), # ASSIGN COLUMN VALES FOR EACH
  x = c(1, 2, 3,
        1, 2, 3,
        1, 2, 3,
        1, 2, 3),
  season = factor(c("early", "early", "early",
                    "early", "early", "early",
                    "late", "late", "late",
                    "late", "late", "late")),
  d_s = factor(c("D", "D", "D",
                 "S", "S", "S",
                 "D", "D", "D",
                 "S", "S", "S")),
  y = rep(0, 12),
  value = c("A", "B", "B",
            "A", "A", "A",
            "A", "B", "C", 
            "A", "B", "C") # ASSIGN SIGNIFICANCE LETTERS GIVEN COLUMN
)
                     
diamonds_raw2 <- data.frame(
  period = factor(c("pre", "during", "post", 
                    "pre", "during", "post", 
                    "pre", "during", "post", 
                    "pre", "during", "post")), # ASSIGN COLUMN VALUES FOR EACH
  season = factor(c("early", "early", "early",
                    "early", "early", "early",
                    "late", "late", "late",
                    "late", "late", "late")),
  d_s = factor(c("D", "D", "D",
                 "S", "S", "S",
                 "D", "D", "D",
                 "S", "S", "S")),
  x = c(1, 2, 3,
        1, 2, 3,
        1, 2, 3,
        1, 2, 3),
  y = c(2.5, 3.2, 3.2,
        1.1, 1.4, 1.7,
        0.9, 2.1, 1.6,
        0.5, 1.0, 1.2) # ASSIGN MEANS GIVEN COLUMN VALUES
)

# STEP 4.4: MEANS FROM MODEL
# MODEL MEANS
smeans <- emmeans(secmodel2, ~ period * d_s * season, type = "response") # MODEL, SPECIFY DEPENDENT ~ INDEPENDENT VARIABLES
summary(smeans)

##  period d_s season response    SE  df asymp.LCL asymp.UCL
##  pre    D   early      3.40 0.395 Inf      2.77      4.40
##  during D   early      3.87 0.522 Inf      3.06      5.26
##  post   D   early      4.41 0.689 Inf      3.38      6.36
##  pre    S   early      2.62 0.243 Inf      2.22      3.20
##  during S   early      2.67 0.255 Inf      2.25      3.28
##  post   S   early      2.97 0.330 Inf      2.44      3.80
##  pre    D   late       1.91 0.126 Inf      1.69      2.19
##  during D   late       2.63 0.244 Inf      2.22      3.21
##  post   D   late       3.07 0.335 Inf      2.53      3.90
##  pre    S   late       1.70 0.104 Inf      1.52      1.93
##  during S   late       2.11 0.160 Inf      1.84      2.48
##  post   S   late       2.42 0.217 Inf      2.06      2.94
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the scale(0, 3.09)[inverse] scale

smeans_df <- as.data.frame(smeans) # PUT VALUES INTO DATAFRAME

print(smeans_df)

##  period d_s season response        SE  df asymp.LCL asymp.UCL
##  pre    D   early  3.397221 0.3945631 Inf  2.767287  4.398471
##  during D   early  3.868506 0.5218431 Inf  3.059583  5.258908
##  post   D   early  4.410817 0.6888077 Inf  3.377156  6.356326
##  pre    S   early  2.618886 0.2432056 Inf  2.215613  3.201628
##  during S   early  2.669785 0.2546984 Inf  2.249223  3.283793
##  post   S   early  2.973019 0.3295956 Inf  2.442334  3.798347
##  pre    D   late   1.907330 0.1263616 Inf  1.688129  2.191953
##  during D   late   2.627035 0.2436314 Inf  2.222972  3.210620
##  post   D   late   3.065670 0.3348905 Inf  2.525046  3.900861
##  pre    S   late   1.699076 0.1040624 Inf  1.516977  1.930858
##  during S   late   2.111932 0.1596847 Inf  1.839351  2.479359
##  post   S   late   2.423569 0.2171558 Inf  2.061531  2.939854
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the scale(0, 3.09)[inverse] scale

sback_tran_df <- data.frame(smeans_df) # ENSURE NO OVERWRITING

# TREATMENT SIGNIFICANCE LETTERS
emm <- emmeans(secmodel2, specs = ~ period, method = "tukey") # MODEL, SPECIFY DEPENDENT ~ INDEPENDENT VARIABLES

## NOTE: Results may be misleading due to involvement in interactions

summary(emm)

##  period emmean     SE  df asymp.LCL asymp.UCL
##  pre      1.38 0.0788 Inf     1.229      1.54
##  during   1.15 0.0798 Inf     0.993      1.31
##  post     1.01 0.0829 Inf     0.844      1.17
## 
## Results are averaged over the levels of: d_s, season 
## Results are given on the inverse (not the response) scale. 
## Confidence level used: 0.95

post_hoc_results <- pairs(emm) # CALCULATE POST-HOC
summary(post_hoc_results)

##  contrast      estimate     SE  df z.ratio p.value
##  pre - during     0.233 0.0302 Inf   7.714  <.0001
##  pre - post       0.377 0.0398 Inf   9.471  <.0001
##  during - post    0.143 0.0360 Inf   3.983  0.0002
## 
## Results are averaged over the levels of: d_s, season 
## Note: contrasts are still on the inverse scale 
## Results are given on the scale(0, 3.09) (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates

significance_letters <- cld(emm) # EXTRACT SIGNIFICANCE LETTERS FROM POST-HOC
print(significance_letters)

##  period emmean     SE  df asymp.LCL asymp.UCL .group
##  post     1.01 0.0829 Inf     0.844      1.17  1    
##  during   1.15 0.0798 Inf     0.993      1.31   2   
##  pre      1.38 0.0788 Inf     1.229      1.54    3  
## 
## Results are averaged over the levels of: d_s, season 
## Results are given on the inverse (not the response) scale. 
## Confidence level used: 0.95 
## Note: contrasts are still on the inverse scale 
## Results are given on the scale(0, 3.09) (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

letters5 <- data.frame( # MANUALLY ASSIGN SIG LETTERS FOR TREATMENT
  period = factor(c("pre", "during", "post")),
  x = c(1, 2, 3),
  y = rep(0),
  value = c("A", "B", "C")
)

# TREATMENT MEANS
smeans2 <- emmeans(secmodel2, ~ period, type = "response") # MODEL, SPECIFY DEPENDENT ~ INDEPENDENT VARIABLES

## NOTE: Results may be misleading due to involvement in interactions

summary(smeans2)

##  period response    SE  df asymp.LCL asymp.UCL
##  pre        2.24 0.127 Inf      2.01      2.52
##  during     2.69 0.187 Inf      2.37      3.11
##  post       3.07 0.253 Inf      2.65      3.66
## 
## Results are averaged over the levels of: d_s, season 
## Confidence level used: 0.95 
## Intervals are back-transformed from the scale(0, 3.09)[inverse] scale

smeans_df2 <- as.data.frame(smeans2) # PLACE VALUES IN DATAFRAME
print(smeans_df2)

##  period response        SE  df asymp.LCL asymp.UCL
##  pre    2.235826 0.1273686 Inf  2.011262  2.516840
##  during 2.689587 0.1866343 Inf  2.367585  3.112965
##  post   3.072476 0.2531537 Inf  2.645290  3.664207
## 
## Results are averaged over the levels of: d_s, season 
## Confidence level used: 0.95 
## Intervals are back-transformed from the scale(0, 3.09)[inverse] scale

sback_transformed_df2 <- data.frame(smeans_df2) # ENSURE NO OVERWRITING

# SCATTER PLOT FULL MODEL SIG LETTERS
letters4 <- data.frame( 
  period = factor(c("pre", "during", "post",
                    "pre", "during", "post",
                    "pre", "during", "post", 
                    "pre", "during", "post")), # ASSIGN COLUMN VALUES
  x = c(0.75, 1, 1.25,
  0.75, 1, 1.25,
  1.75, 2, 2.25,
  1.75, 2, 2.25),
  season = factor(c("early", "early", "early",
                    "late", "late", "late", 
                    "early", "early", "early",
                    "late", "late", "late")),
  d_s = factor(c("D", "D", "D",
                 "D", "D", "D",
                 "S", "S", "S", 
                 "S", "S", "S")),
  y = rep(0, 12),
  value = c("A", "B", "B",
            "A", "B", "C",
            "A", "A", "A", 
            "A", "B", "C") # ASSIGN SIG LETTERS
)

# STEP 5: GRAPH MODEL
# STEP 5.1: GRAPH MODEL WITH RAW DATA MEANS AND SIGNIFICANCE LETTERS
ggplot(WQSAV, aes(x = period, y = secchi, fill = period)) + # CALL DATAFRAME, X-AXIS DATA: TIME OF TREATMENT, Y-AXIS DATA: SECCHI, FILL FOR BOXPLOT: TIME OF TREATMENT
  geom_boxplot(outlier.shape = NA, position = position_dodge2(padding = 0.1)) + # SPECIFY BOXPLOT, NO OUTLIER VISUALIZATIONS, AND OFFSET EACH BOXPLOT
  geom_text(data = letters2, aes(x = x, y = y, label = value), vjust = -0.5) + # ASSIGN 'LETTERS' TO DISPLAY SIG LETTERS
  geom_point(data = diamonds_raw2, aes(x = x, y = y), fill = "white", color = "black", shape = 23, size = 3, stroke = 1.05) + # ASSIGN RAW DATA AND SPECIFY RAW DATA FILL, SHAPE, SIZE, AND OUTLINE WIDTH
  labs(
    x = "", # NO X-AXIS LABEL
    y = "Secchi (m)\n" # Y-AXIS LABEL
  ) +
  scale_fill_manual(values = c("#fdd8d1", "#a5c9da", "#99cbac"), labels = c("Pre", "During", "Post")) +  # ASSIGN COLORS AND LABELS
  theme(legend.title = element_blank()) + # SPECIFY NO TITLE
  coord_cartesian(ylim = c(0, 6.5)) + # SET Y-AXIS LIMIT TO CUT DOWN ON WHITE SPACE
  scale_x_discrete(labels = c("D" = "Deep", "S" = "Shallow"))  + # SPECIFY LABELS ON X-AXIS
  facet_grid(d_s ~ season, scales = "free", space = "free", # FACET WRAP BY DEPTH AND TIME OF SAMPLING
             labeller = labeller(season = c("early" = "Early", "late" = "Late"), d_s = c("D" = "Deep", "S" = "Shallow"))) # SPECIFY LABELS FOR DEPTH AND SEASON

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# STEP 5.2: GRAPH MODEL WITH MODEL MEANS AND SIGNIFICANCE LETTERS
ggplot(WQSAV, aes(x = period, y = secchi, fill = period)) + # CALL DATAFRAME, X-AXIS DATA: TIME OF TREATMENT, Y-AXIS DATA: SECCHI, FILL FOR BOXPLOT: TIME OF TREATMENT
  geom_boxplot(outlier.shape = NA, position = position_dodge2(padding = 0.1)) + # SPECIFY BOXPLOT, NO OUTLIER VISUALIZATION, AND OFFSET BOXPLOTS
  geom_text(data = letters2, aes(x = x, y = y, label = value), vjust = -0.5) + # DISPLAY SIGNIFICANCE LETTERS
  geom_point(data = sback_tran_df, aes(y = response, color = period), position = position_dodge(width = 0.8), size = 9, shape = 21, fill = "transparent", stroke = 1.5) + # DISPLAY RAW MEANS / RAW MEANS AESTHETICS
  labs(
    x = "", # NO X-AXIS LABEL
    y = "Secchi (m)\n" # Y-AXIS LABEL
  ) +
  scale_fill_manual(values = c("#fdd8d1", "#a5c9da", "#99cbac"), # COLORS FOR FILL
                    labels = c("Pre", "During", "Post")) +  # LABELS FOR FILL
  theme(legend.title = element_blank(), # SPECIFY NO TITLE NAME
        axis.text.y = element_text(size = 12),  # ADJUST FONT SIZES
        axis.title.y = element_text(size = 14), 
        legend.text = element_text(size = 12),  
        strip.text = element_text(size = 14),   
        text = element_text(size = 12)) +       
  coord_cartesian(ylim = c(0, 6.5)) +
  facet_grid(d_s ~ season, scales = "free", space = "free", # FACET WRAP BY DEPTH AND TIME OF SAMPLING TO INCORPORATE INTO PLOT
             labeller = labeller(season = c("early" = "Early", "late" = "Late"), d_s = c("D" = "Deep", "S" = "Shallow"))) # SPECIFY LABELS

## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# STEP 5.3: SCATTERPLOT FULL MODEL RESULTS
ggplot(WQSAV, aes(x = d_s, y = secchi, color = period)) + # CALL DATAFRAME, X-AXIS DATA: DEPTH, Y-AXIS DATA: SECCHI, COLOR OF POINTS: TIME OF TREATMENT
  geom_point(position = position_dodge(width = 0.8), size = 5, alpha = 0.3) + # SPECIFY SCATTERPLOT, OFFSET POSITION OF POINTS
  geom_text(data = letters4, aes(x = x, y = y, label = value), size = 6, vjust = -0.5, color = "black") + # ASSIGN SIGNIFICANCE LETTERS
  labs(
    x = "", # NO X-AXIS LABEL
    y = "Secchi (m)\n", # Y-AXIS LABEL
    color = ""  # NO LABEL FOR COLOR OF POINTS
  ) +
  scale_x_discrete(labels = c("D" = "Deep", "S" = "Shallow")) + # SPECIFY DEPTH LABELS
  scale_color_manual(values = c("pre" = "#217631", "during" = "#217631", "post" = "#217631"), # SPECIFY COLORS FOR POINTS
                     labels = c("Pre", "During", "Post")) +  # LABELS FOR COLORS
  facet_wrap(~ season, labeller = labeller(season = c("early" = "Early", "late" = "Late"))) + # FACET WRAP BY TIME OF SAMPLING TO INCORPORATE INTO GRAPH
  theme_minimal() + # SPECIFY THEME OF GRAPH
  theme(
    axis.title = element_text(size = 25),  # ADJUST TEXT SIZE
    axis.text = element_text(size = 15, color = "black"),   
    legend.title = element_text(size = 25),
    legend.text = element_text(size = 20),
    strip.text = element_text(size = 15, color = "black"), 
    axis.line.y = element_line(color = "black"), # ADD Y-AXIS
    legend.position = "none", # REMOVE LEGEND
    panel.grid.major.x = element_blank(),# REMOVE MAJOR GRID LINE
  ) + 
  geom_vline(xintercept = 2.5, color = "grey40") + # SPECIFY MAJOR GRID LINE
  geom_point(data = sback_tran_df, aes(y = response, color = period), # SPECIFY MODEL MEANS AND THEIR COLOR
             position = position_dodge(width = 0.8), size = 9, shape = 21, fill = "transparent", stroke = 1.5) + # AESTHETICS OF MODEL MEANS
  scale_y_reverse() # INVERT Y-AXIS

## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

# STEP 5.4: SCATTERPLOT WITH TREATMENT FOCUS
# FINAL MODEL RESULTS WITH JUST TREATMENT
ggplot(WQSAV, aes(x = period, y = secchi, color = period)) + # CALL DATAFRAME, X-AXIS LABEL: TIME OF TREATMENT, Y-AXIS LABEL: SECCHI, COLOR OF POINTS: TIME OF TREATMENT
  geom_text(data = letters5, aes(x = x, y = y, label = value), size = 6, vjust = -0.5, color = "black") + # ASSIGN SIGNIFICANCE LETTERS
  geom_point(position = position_dodge(width = 0.8), size = 5, alpha = 0.3) +  # SPECIFY SCATTERPLOT AND POINT AESTHETICS
  labs(
    x = "", # NO X-AXIS LABEL
    y = "Secchi (m)\n", # Y-AXIS LABEL
    color = ""  # NO LABEL FOR COLOR OF POINTS
  ) +
  scale_x_discrete(labels = c("D" = "Deep", "S" = "Shallow")) + # SPECIFY LABELS FOR DEPTH
  scale_color_manual(values = c("pre" = "#217631", "during" = "#217631", "post" = "#217631")) +  # SPECIFY COLORS FOR POINTS
  theme_minimal() + # THEME OF GRAPH
  theme(
    axis.title = element_text(size = 25),  # ADJUST TEXT SIZE
    axis.text = element_text(size = 15, color = "black"),   
    legend.title = element_text(size = 25),
    legend.text = element_text(size = 20),
    strip.text = element_text(size = 15, color = "black"), 
    axis.line.y = element_line(color = "black"), # ADD Y-AXIS
    legend.position = "none", # REMOVE LEGEND
    panel.grid.major.x = element_blank(),# REMOVE MAJOR GRID LINE
  ) +
  geom_point(data = sback_transformed_df2, aes(y = response, color = period), # SPECIFY MODEL MEANS AND AESTHETICS
             position = position_dodge(width = 0.8), size = 9, shape = 21, fill = "transparent", stroke = 1.5) + # MEAN AESTHETICS
  scale_x_discrete(labels = c("Pre", "During", "Post")) + # LABEL FOR X-AXIS
scale_y_reverse() # INVERT Y-AXIS

## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).

sessionInfo()

## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] broom.mixed_0.2.9.5 boot_1.3-30         multcomp_1.4-25    
##  [4] TH.data_1.1-2       MASS_7.3-58.1       survival_3.4-0     
##  [7] mvtnorm_1.2-4       emmeans_1.10.0      performance_0.10.9 
## [10] lmerTest_3.1-3      lme4_1.1-35.1       Matrix_1.6-5       
## [13] readxl_1.4.3        lubridate_1.9.3     forcats_1.0.0      
## [16] stringr_1.5.1       dplyr_1.1.4         purrr_1.0.2        
## [19] readr_2.1.5         tidyr_1.3.1         tibble_3.2.1       
## [22] ggplot2_3.5.0       tidyverse_2.0.0    
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9          jsonlite_1.8.8      splines_4.2.2      
##  [4] bslib_0.6.1         datawizard_0.9.1    highr_0.10         
##  [7] cellranger_1.1.0    yaml_2.3.8          globals_0.16.3     
## [10] numDeriv_2016.8-1.1 pillar_1.9.0        backports_1.4.1    
## [13] lattice_0.20-45     glue_1.7.0          digest_0.6.35      
## [16] minqa_1.2.6         colorspace_2.1-0    sandwich_3.1-0     
## [19] htmltools_0.5.7     pkgconfig_2.0.3     broom_1.0.5        
## [22] listenv_0.9.1       xtable_1.8-4        patchwork_1.2.0    
## [25] scales_1.3.0        tzdb_0.4.0          timechange_0.3.0   
## [28] mgcv_1.8-41         farver_2.1.1        generics_0.1.3     
## [31] cachem_1.0.8        withr_3.0.0         furrr_0.3.1        
## [34] cli_3.6.2           magrittr_2.0.3      estimability_1.5   
## [37] evaluate_0.23       parallelly_1.37.1   fansi_1.0.6        
## [40] future_1.33.1       nlme_3.1-160        tools_4.2.2        
## [43] hms_1.1.3           see_0.8.2           lifecycle_1.0.4    
## [46] munsell_0.5.0       compiler_4.2.2      jquerylib_0.1.4    
## [49] multcompView_0.1-10 rlang_1.1.3         grid_4.2.2         
## [52] nloptr_2.0.3        rstudioapi_0.15.0   labeling_0.4.3     
## [55] rmarkdown_2.26      gtable_0.3.4        codetools_0.2-18   
## [58] R6_2.5.1            zoo_1.8-12          knitr_1.45         
## [61] fastmap_1.1.1       utf8_1.2.4          insight_0.19.9     
## [64] stringi_1.8.3       parallel_4.2.2      Rcpp_1.0.12        
## [67] vctrs_0.6.5         tidyselect_1.2.1    xfun_0.42          
## [70] coda_0.19-4.1

Secchi_Model_Code

Katie Hembre

2024-05-10