Primary programmer: John Fieberg | Secondary programmer: Althea A. ArchMiller
Date: 20160805
Complete code and analysis for simulation examples in: Fieberg et al. Species distribution models: Predictive snipers or shots in the dark?
Non-linear example. Simulate data for example with the probability of selection proportional to \(\exp(2x_3 - x_3^2)\), where \(x_3\) is temperature.
Set seed
set.seed(1021983)
Load libraries
library(ezknitr)
library(MASS)
library(ggplot2)
library(KernSmooth)
uhcplots functions from GitHub
library(devtools)
install_github("aaarchmiller/uhcplots")
Skipping install of 'uhcplots' from a github remote, the SHA1 (1f479193) has not changed since last install.
Use `force = TRUE` to force installation
library(uhcplots)
#################################################################
Global settings
nused_train <- nused_test <- 100
navail_train <- navail_test <- 10000
ntemp <- 1000000
nsims <- 1000 # M for step 3 of UHC method
Example-specific settings
example <- "non-linear"
set.seed(1000)
betas <- c(2, -1)
Scenario-specific settings
scenario <- "T"
outfig1 <- "uhcplots/graphics/Boyce2.pdf"
outfig2 <- "uhcplots/graphics/crosssim2.pdf"
#################################################################
Training data
traindat <- uhcdatasimulator(nused = nused_train,
navail = navail_train,
betas = betas,
corx = corx.train,
ntemp = ntemp,
example = example)
Test data
testdat <- uhcdatasimulator(nused = nused_test,
navail = navail_test,
betas = betas,
corx = corx.test,
ntemp = ntemp,
example = example)
#################################################################
Fit 2 models
# correct model
train.correct <- glm(y~temp + I(temp^2), family=binomial(), data=traindat)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(train.correct)
Call:
glm(formula = y ~ temp + I(temp^2), family = binomial(), data = traindat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.2607 -0.1966 -0.0726 -0.0064 3.9314
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.5281 0.1938 -23.368 < 2e-16 ***
temp 2.2082 0.3497 6.314 2.71e-10 ***
I(temp^2) -1.0479 0.1667 -6.284 3.29e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1122.03 on 10099 degrees of freedom
Residual deviance: 966.78 on 10097 degrees of freedom
AIC: 972.78
Number of Fisher Scoring iterations: 11
# incorrect model
train.misspec <- glm(y~temp, family=binomial(), data=traindat)
summary(train.misspec)
Call:
glm(formula = y ~ temp, family = binomial(), data = traindat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.3270 -0.1566 -0.1326 -0.1124 3.1247
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.72155 0.11152 -42.338 < 2e-16 ***
temp 0.24330 0.05113 4.759 1.95e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1122.0 on 10099 degrees of freedom
Residual deviance: 1099.2 on 10098 degrees of freedom
AIC: 1103.2
Number of Fisher Scoring iterations: 7
Save regression models (to make a table later)
save(train.correct, train.misspec,
file=paste0("examples/Regmods/Regmods_", scenario, ".R"))
#################################################################
source("examples/functions_not_in_uhcplots/ecalcrsf.R")
Steps 1 & 2: Incorrect model.
# [1.]
wx.hat.misspec <- exp(predict(train.misspec, newdata=testdat))
# [2.]
cal.misspec <- ecalrsf(preds = wx.hat.misspec,
y_test = testdat$y,
nbins = 10,
do_plot = FALSE)
# Relationship between observed (nx) and expected (Nx) number of pts per bin
lmfit.misspec <- lm(nx~Nx, data=cal.misspec)
# Spearman correlation coefficient
sp.mcor <- round(cor(cal.misspec$Nx, cal.misspec$nx, method="spearman"),2)
# Confidence intervals
pmisspec <- predict(lmfit.misspec, interval="confidence")
Steps 1 & 2: Correct model.
# [1.]
wx.hat.correct <- exp(predict(train.correct, newdata=testdat))
# [2.]
cal.correct <- ecalrsf(preds = wx.hat.correct,
y_test = testdat$y,
nbins = 10,
do_plot = FALSE)
# Relationship between observed (nx) and expected (Nx) number of pts per bin
lmfit.correct <- lm(nx~Nx, data=cal.correct)
# Spearman correlation coefficient
sp.ccor <- round(cor(cal.correct$Nx, cal.correct$nx, method="spearman"),2)
# Confidence intervals
pcorrect <- predict(lmfit.correct, interval="confidence")
[3.] Produce a calibration plot formatted for manuscript
par(mfrow=c(1,2),mar=c(4,2,2,2), oma=c(1, 4, 7, 0), bty="L")
# Incorrect Model
with(cal.misspec, plot(Nx, nx, xlab=" ", ylab=" ", pch=16, xlim=c(0, max(Nx))))
abline(0,1, lty=1, col="gray", lwd=2)
abline(lmfit.correct)
lines(cal.misspec$Nx, pmisspec[,2], lty=2)
lines(cal.misspec$Nx, pmisspec[,3], lty=2)
mtext(side=3, line=1, "A)", cex=1.2, ad=0)
r2 <- round(summary(lmfit.misspec)$r.square,2)
b0 <- round(coef(lmfit.misspec)[1],2)
b1 <- round(coef(lmfit.misspec)[2],2)
ax.lims <- par("usr")
delt.lims <- c(ax.lims[2]-ax.lims[1],ax.lims[4]-ax.lims[3])
text.x.1 <- ax.lims[1] + 0.078*delt.lims[1]
text.x.2 <- ax.lims[1] + 0.14*delt.lims[1]
text.y.1 <- ax.lims[3] + 0.9*delt.lims[2]
text.y.2 <- ax.lims[3] + 0.82*delt.lims[2]
text.y.3 <- ax.lims[3] + 0.74*delt.lims[2]
text.y.4 <- ax.lims[3] + 0.6*delt.lims[2]
text.y.5 <- ax.lims[3] + 0.52*delt.lims[2]
text(text.x.1,text.y.1, "Regression equation", adj=0, cex=1.2)
text(text.x.2,text.y.2, bquote(~y == .(b0) + .(b1)*x), adj=0, cex=1.2)
text(text.x.2,text.y.3, bquote(~R^2 == .(r2)), adj=0, cex=1.2)
text(text.x.1,text.y.4, "Spearman Correlation", adj=0, cex=1.2)
text(text.x.2,text.y.5, bquote( .(sp.mcor)), adj=0, cex=1.2)
# Correct model
with(cal.correct, plot(Nx, nx, xlab=" ", ylab=" ", pch=16, xlim=c(0, max(Nx))))
abline(lmfit.correct)
abline(0,1, lty=1, col="gray", lwd=2)
lines(cal.correct$Nx, pcorrect[,2], lty=2)
lines(cal.correct$Nx, pcorrect[,3], lty=2)
mtext(side=3, line=1, "B)", cex=1.2, ad=0)
r2 <- round(summary(lmfit.correct)$r.square,2)
b0 <- round(coef(lmfit.correct)[1],2)
b1 <- round(coef(lmfit.correct)[2],2)
ax.lims <- par("usr")
delt.lims <- c(ax.lims[2]-ax.lims[1],ax.lims[4]-ax.lims[3])
text.x.1 <- ax.lims[1] + 0.078*delt.lims[1]
text.x.2 <- ax.lims[1] + 0.14*delt.lims[1]
text.y.1 <- ax.lims[3] + 0.9*delt.lims[2]
text.y.2 <- ax.lims[3] + 0.82*delt.lims[2]
text.y.3 <- ax.lims[3] + 0.74*delt.lims[2]
text.y.4 <- ax.lims[3] + 0.6*delt.lims[2]
text.y.5 <- ax.lims[3] + 0.52*delt.lims[2]
text(text.x.1,text.y.1, "Regression equation", adj=0, cex=1.2)
text(text.x.2,text.y.2, bquote(~y == .(b0) + .(b1)*x), adj=0, cex=1.2)
text(text.x.2,text.y.3, bquote(~R^2 == .(r2)), adj=0, cex=1.2)
text(text.x.1,text.y.4, "Spearman Correlation", adj=0, cex=1.2)
text(text.x.2,text.y.5, bquote( .(sp.ccor)), adj=0, cex=1.2)
mtext(outer=T, side=2, line=1,
"Observed number of used points in each bin", cex=1.3)
mtext(outer=T, side=1, line = 0,
"Expected number of used points in each bin", cex=1.3)
mtext(outer=T, side=3, line=-1, adj=0.25,
expression(y %~% temp), cex=1.3)
mtext(outer=T, side=3.5, line=-1, adj=0.75,
expression(y %~% temp + temp^2), cex=1.3)
plot.calib <- recordPlot()
Error in pdf(file = outfig1, width = 12, height = 6): cannot open file 'uhcplots/graphics/Boyce2.pdf'
RStudioGD
2
#################################################################
Incorrect model
# uhcsim function
xhat1.missspec <- uhcsim(nsims = 1000,
nused_test = nused_test,
xmat = as.matrix(testdat[, c("temp")]),
fit_rsf = train.misspec,
z = as.matrix(testdat[, c("temp")]))
# uhcdenscalc function
denshats.temp.mod1 <- uhcdenscalc(rand_sims = xhat1.missspec[,,1],
dat = subset(testdat, y==1, select="temp"),
avail = subset(testdat, y==0, select="temp"))
Correct model
# uhcsim function
xhat1.correct <- uhcsim(nsims=1000,
nused_test=nused_test,
xmat=model.matrix(y~temp + I(temp^2),
data=testdat)[,-1],
fit_rsf=train.correct,
z=as.matrix(testdat[, c("temp")]))
# uhcdenscalc function for both predictors
denshats.temp.mod2 <- uhcdenscalc(rand_sims = xhat1.correct[,,1],
dat = subset(testdat, y==1, select="temp"),
avail = subset(testdat, y==0, select="temp"))
Uses uhcdensplot function
par(mfcol=c(1,2), mar=c(3,2,2,3), oma=c(3, 4, 2, 0), bty="L")
# First, the density plots
uhcdensplot(densdat = denshats.temp.mod1$densdat,
densrand = denshats.temp.mod1$densrand,
includeAvail = TRUE,
densavail = denshats.temp.mod1$densavail,
xl = c(-5,5),
yl = c(0,0.7))
mtext(side=3, line=1, "A)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
uhcdensplot(densdat = denshats.temp.mod2$densdat,
densrand = denshats.temp.mod2$densrand,
includeAvail = TRUE,
densavail = denshats.temp.mod2$densavail,
xl = c(-5,5),
yl = c(0,0.7))
mtext(side=3, line=1, "B)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp + temp^2), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
uhc.plot <- recordPlot()
Error in pdf(file = outfig2, width = 8, height = 4): cannot open file 'uhcplots/graphics/crosssim2.pdf'
RStudioGD
2
#################################################################
\(f^u(z)-\hat{f}^u(z)_i\)
Uses uhcdiffdensplot function
par(mfrow=c(1,2), mar=c(2,2,2,2), oma=c(3, 0, 5, 0), bty="L")
# Incorrect model (missing predictor)
uhcdiffdensplot(densdat = denshats.temp.mod1$densdat,
densrand = denshats.temp.mod1$densrand,
xl = c(-5,9))
mtext(side=3, line=1, "A)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
uhcdiffdensplot(densdat = denshats.temp.mod2$densdat,
densrand = denshats.temp.mod2$densrand,
xl = c(-5,9))
mtext(side=3, line=1, "B)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp + temp^2), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
References:
Boyce, M.S., Vernier, P.R., Nielsen, S.E. & Schmiegelow, F.K. (2002). Evaluating resource selection functions. Ecological Modelling, 157, 281–300.
Johnson, C.J., Nielsen, S.E., Merrill, E.H., McDonald, T.L. & Boyce, M.S. (2006). Resource selection functions based on use-availability data: Theoretical motivation and evaluation methods. Journal of Wildlife Management, 70, 347-357.
Session Information
sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] uhcplots_0.1.0 devtools_1.12.0 KernSmooth_2.23-15
[4] ggplot2_2.1.0 MASS_7.3-45 knitr_1.14
[7] ezknitr_0.6
loaded via a namespace (and not attached):
[1] Rcpp_0.12.7 magrittr_1.5 munsell_0.4.3
[4] colorspace_1.2-6 R6_2.2.0 stringr_1.1.0
[7] httr_1.2.1 plyr_1.8.4 tools_3.3.1
[10] grid_3.3.1 gtable_0.2.0 R.oo_1.20.0
[13] git2r_0.15.0 withr_1.0.2 htmltools_0.3.5
[16] yaml_2.1.13 digest_0.6.10 formatR_1.4
[19] R.utils_2.4.0 curl_2.1 mime_0.5
[22] memoise_1.0.0 evaluate_0.9 rmarkdown_1.0
[25] stringi_1.1.2 scales_0.4.0 R.methodsS3_1.7.1
[28] markdown_0.7.7
Spun with: ezspin(file = “examples/example_2_temp.R”, out_dir =“examples/output”, fig_dir = “figures”, keep_md = FALSE)