Primary programmer: John Fieberg | Secondary programmer: Althea A. ArchMiller
Date: 20160805
Complete code and analysis for simulation examples in: Fieberg et al. Species distribution models: Predictive snipers or shots in the dark?
Non-linear example. Simulate data for example with the probability of selection proportional to \(\exp(2x_3 - x_3^2)\), where \(x_3\) is temperature.
Set seed
set.seed(1021983)
Load libraries
library(ezknitr)
library(MASS)
library(ggplot2)
library(KernSmooth)
################################################
uhcdatasimulator
Function to simulate the data used in the manuscript.
source("uhcplots/functions/uhcdatasimulator.R")
ecalcrsf
Function that creates the calibration plots following Boyce et al. (2002) and Johnson et al. (2006)
source("uhcplots/functions/ecalcrsf.R")
uhcsim
Function that samples, randomly, locations from non-stratified test data. Will return an array of dimension nsims x nused_test x p (where p is the number of predictors to be validated)
source("uhcplots/functions/uhcsim.R")
uhcdenscalc
Function to calculate density estimates for the environmental characteristics associated with the observed locations in the test data and also associated with the randomly chosen locations generated by the uhcsim or uhcsimstrat functions.
source("uhcplots/functions/uhcdenscalc.R")
uhcdensplot
Function to plot the density of the environmental characteristics at the observed locations in the test data set, \(f^u(z)\), along with a simulation envelope for \(f^U(z)\) created by randomly choosing locations in the test data using the uhcsim or uhcsimstrat functions.
source("uhcplots/functions/uhcdensplot.R")
uhcdiffdensplot
Function for plotting a simulation envelope for \(f^U(z)- \hat{f}^u(z)\)
source("uhcplots/functions/uhcdiffdensplot.R")
#################################################################
Global settings
nused_train <- nused_test <- 100
navail.train <- navail.test <- 10000
ntemp <- 1000000
nsims <- 1000 # M for step 3 of UHC method
Example-specific settings
example <- "non-linear"
set.seed(1000)
betas <- c(2, -1)
Scenario-specific settings
scenario <- "T"
outfig1 <- "uhcplots/graphics/Boyce2.pdf"
outfig2 <- "uhcplots/graphics/crosssim2.pdf"
#################################################################
Training data
traindat <- uhcdatasimulator(nused = nused_train,
navail = navail.train,
betas = betas,
corx = corx.train,
ntemp = ntemp,
scenario = scenario,
example = example)
Test data
testdat <- uhcdatasimulator(nused = nused_test,
navail = navail.test,
betas = betas,
corx = corx.test,
ntemp = ntemp,
scenario = scenario,
example = example)
#################################################################
Fit 2 models
# correct model
train.correct <- glm(y~temp + I(temp^2), family=binomial(), data=traindat)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(train.correct)
Call:
glm(formula = y ~ temp + I(temp^2), family = binomial(), data = traindat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.2607 -0.1966 -0.0726 -0.0064 3.9314
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.5281 0.1938 -23.368 < 2e-16 ***
temp 2.2082 0.3497 6.314 2.71e-10 ***
I(temp^2) -1.0479 0.1667 -6.284 3.29e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1122.03 on 10099 degrees of freedom
Residual deviance: 966.78 on 10097 degrees of freedom
AIC: 972.78
Number of Fisher Scoring iterations: 11
# incorrect model
train.misspec <- glm(y~temp, family=binomial(), data=traindat)
summary(train.misspec)
Call:
glm(formula = y ~ temp, family = binomial(), data = traindat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.3270 -0.1566 -0.1326 -0.1124 3.1247
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.72155 0.11152 -42.338 < 2e-16 ***
temp 0.24330 0.05113 4.759 1.95e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1122.0 on 10099 degrees of freedom
Residual deviance: 1099.2 on 10098 degrees of freedom
AIC: 1103.2
Number of Fisher Scoring iterations: 7
Save regression models (to make a table later)
save(train.correct, train.misspec,
file=paste0("uhcplots/Regmods/Regmods_", scenario, ".R"))
#################################################################
Steps 1 & 2: Incorrect model.
# [1.]
wx.hat.misspec <- exp(predict(train.misspec, newdata=testdat))
# [2.]
cal.misspec <- ecalrsf(preds = wx.hat.misspec,
y_test = testdat$y,
nbins = 10,
do_plot = FALSE)
# Relationship between observed (nx) and expected (Nx) number of pts per bin
lmfit.misspec <- lm(nx~Nx, data=cal.misspec)
# Spearman correlation coefficient
sp.mcor <- round(cor(cal.misspec$Nx, cal.misspec$nx, method="spearman"),2)
# Confidence intervals
pmisspec <- predict(lmfit.misspec, interval="confidence")
Steps 1 & 2: Correct model.
# [1.]
wx.hat.correct <- exp(predict(train.correct, newdata=testdat))
# [2.]
cal.correct <- ecalrsf(preds = wx.hat.correct,
y_test = testdat$y,
nbins = 10,
do_plot = FALSE)
# Relationship between observed (nx) and expected (Nx) number of pts per bin
lmfit.correct <- lm(nx~Nx, data=cal.correct)
# Spearman correlation coefficient
sp.ccor <- round(cor(cal.correct$Nx, cal.correct$nx, method="spearman"),2)
# Confidence intervals
pcorrect <- predict(lmfit.correct, interval="confidence")
[3.] Produce a calibration plot formatted for manuscript
par(mfrow=c(1,2),mar=c(4,2,2,2), oma=c(1, 4, 7, 0), bty="L")
# Incorrect Model
with(cal.misspec, plot(Nx, nx, xlab=" ", ylab=" ", pch=16))
abline(0,1, lty=1, col="gray", lwd=2)
abline(lmfit.correct)
lines(cal.misspec$Nx, pmisspec[,2], lty=2)
lines(cal.misspec$Nx, pmisspec[,3], lty=2)
mtext(side=3, line=1, "A)", cex=1.2, ad=0)
r2 <- round(summary(lmfit.misspec)$r.square,2)
b0 <- round(coef(lmfit.misspec)[1],2)
b1 <- round(coef(lmfit.misspec)[2],2)
ax.lims <- par("usr")
delt.lims <- c(ax.lims[2]-ax.lims[1],ax.lims[4]-ax.lims[3])
text.x.1 <- ax.lims[1] + 0.078*delt.lims[1]
text.x.2 <- ax.lims[1] + 0.14*delt.lims[1]
text.y.1 <- ax.lims[3] + 0.9*delt.lims[2]
text.y.2 <- ax.lims[3] + 0.82*delt.lims[2]
text.y.3 <- ax.lims[3] + 0.74*delt.lims[2]
text.y.4 <- ax.lims[3] + 0.6*delt.lims[2]
text.y.5 <- ax.lims[3] + 0.52*delt.lims[2]
text(text.x.1,text.y.1, "Regression equation", adj=0, cex=1.2)
text(text.x.2,text.y.2, bquote(~y == .(b0) + .(b1)*x), adj=0, cex=1.2)
text(text.x.2,text.y.3, bquote(~R^2 == .(r2)), adj=0, cex=1.2)
text(text.x.1,text.y.4, "Spearman Correlation", adj=0, cex=1.2)
text(text.x.2,text.y.5, bquote( .(sp.mcor)), adj=0, cex=1.2)
# Correct model
with(cal.correct, plot(Nx, nx, xlab=" ", ylab=" ", pch=16))
abline(lmfit.correct)
abline(0,1, lty=1, col="gray", lwd=2)
lines(cal.correct$Nx, pcorrect[,2], lty=2)
lines(cal.correct$Nx, pcorrect[,3], lty=2)
mtext(side=3, line=1, "B)", cex=1.2, ad=0)
r2 <- round(summary(lmfit.correct)$r.square,2)
b0 <- round(coef(lmfit.correct)[1],2)
b1 <- round(coef(lmfit.correct)[2],2)
ax.lims <- par("usr")
delt.lims <- c(ax.lims[2]-ax.lims[1],ax.lims[4]-ax.lims[3])
text.x.1 <- ax.lims[1] + 0.078*delt.lims[1]
text.x.2 <- ax.lims[1] + 0.14*delt.lims[1]
text.y.1 <- ax.lims[3] + 0.9*delt.lims[2]
text.y.2 <- ax.lims[3] + 0.82*delt.lims[2]
text.y.3 <- ax.lims[3] + 0.74*delt.lims[2]
text.y.4 <- ax.lims[3] + 0.6*delt.lims[2]
text.y.5 <- ax.lims[3] + 0.52*delt.lims[2]
text(text.x.1,text.y.1, "Regression equation", adj=0, cex=1.2)
text(text.x.2,text.y.2, bquote(~y == .(b0) + .(b1)*x), adj=0, cex=1.2)
text(text.x.2,text.y.3, bquote(~R^2 == .(r2)), adj=0, cex=1.2)
text(text.x.1,text.y.4, "Spearman Correlation", adj=0, cex=1.2)
text(text.x.2,text.y.5, bquote( .(sp.ccor)), adj=0, cex=1.2)
mtext(outer=T, side=2, line=1,
"Observed number of used points in each bin", cex=1.3)
mtext(outer=T, side=1, line = 0,
"Expected number of used points in each bin", cex=1.3)
mtext(outer=T, side=3, line=-1, adj=0.25,
expression(y %~% temp), cex=1.3)
mtext(outer=T, side=3.5, line=-1, adj=0.75,
expression(y %~% temp + temp^2), cex=1.3)
plot.calib <- recordPlot()
quartz_off_screen
2
#################################################################
Incorrect model
# uhcsim function
xhat1.missspec <- uhcsim(nsims = 1000,
nused_test = nused_test,
xmat = as.matrix(testdat[, c("temp")]),
fit_rsf = train.misspec,
z = as.matrix(testdat[, c("temp")]))
# uhcdenscalc function
denshats.temp.mod1 <- uhcdenscalc(sims = xhat1.missspec[,,1],
dat = subset(testdat, y==1, select="temp"),
avail = subset(testdat, y==0, select="temp"))
Correct model
# uhcsim function
xhat1.correct <- uhcsim(nsims=1000,
nused_test=nused_test,
xmat=model.matrix(y~temp + I(temp^2),
data=testdat)[,-1],
fit_rsf=train.correct,
z=as.matrix(testdat[, c("temp")]))
# uhcdenscalc function for both predictors
denshats.temp.mod2 <- uhcdenscalc(sims = xhat1.correct[,,1],
dat = subset(testdat, y==1, select="temp"),
avail = subset(testdat, y==0, select="temp"))
Uses uhcdensplot function
par(mfcol=c(1,2), mar=c(3,2,2,3), oma=c(3, 4, 2, 0), bty="L")
# First, the density plots
uhcdensplot(densdat = denshats.temp.mod1$densdat,
densrand = denshats.temp.mod1$densrand,
includeAvail = TRUE,
densavail = denshats.temp.mod1$densavail,
xl = c(-5,5),
yl = c(0,0.7))
mtext(side=3, line=1, "A)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
uhcdensplot(densdat = denshats.temp.mod2$densdat,
densrand = denshats.temp.mod2$densrand,
includeAvail = TRUE,
densavail = denshats.temp.mod2$densavail,
xl = c(-5,5),
yl = c(0,0.7))
mtext(side=3, line=1, "B)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp + temp^2), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
uhc.plot <- recordPlot()
quartz_off_screen
2
#################################################################
\(f^u(z)-\hat{f}^u(z)_i\)
Uses uhcdiffdensplot function
par(mfrow=c(1,2), mar=c(2,2,2,2), oma=c(3, 0, 5, 0), bty="L")
# Incorrect model (missing predictor)
uhcdiffdensplot(densdat = denshats.temp.mod1$densdat,
densrand = denshats.temp.mod1$densrand,
xl = c(-5,9))
mtext(side=3, line=1, "A)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
uhcdiffdensplot(densdat = denshats.temp.mod2$densdat,
densrand = denshats.temp.mod2$densrand,
xl = c(-5,9))
mtext(side=3, line=1, "B)", cex=1.2, ad=0)
mtext(outer=F, side=2, line=3, "Density")
mtext(side=3, line=1, expression(y %~% temp + temp^2), adj=0.5)
mtext(outer=F, side=1, line=4, expression(Temperature))
References:
Boyce, M.S., Vernier, P.R., Nielsen, S.E. & Schmiegelow, F.K. (2002). Evaluating resource selection functions. Ecological Modelling, 157, 281–300.
Johnson, C.J., Nielsen, S.E., Merrill, E.H., McDonald, T.L. & Boyce, M.S. (2006). Resource selection functions based on use-availability data: Theoretical motivation and evaluation methods. Journal of Wildlife Management, 70, 347-357.
Session Information
sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.10.5 (Yosemite)
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] xtable_1.8-2 dplyr_0.5.0 survival_2.39-5
[4] KernSmooth_2.23-15 ggplot2_2.1.0 MASS_7.3-45
[7] knitr_1.13 ezknitr_0.4
loaded via a namespace (and not attached):
[1] Rcpp_0.12.6 magrittr_1.5 splines_3.3.1
[4] munsell_0.4.3 colorspace_1.2-6 lattice_0.20-33
[7] R6_2.1.2 stringr_1.0.0 plyr_1.8.4
[10] tools_3.3.1 grid_3.3.1 gtable_0.2.0
[13] R.oo_1.20.0 DBI_0.4-1 stargazer_5.2
[16] lazyeval_0.2.0 assertthat_0.1 tibble_1.1
[19] Matrix_1.2-6 formatR_1.4 R.utils_2.3.0
[22] evaluate_0.9 mime_0.5 stringi_1.1.1
[25] scales_0.4.0 R.methodsS3_1.7.1 markdown_0.7.7
Spun with: ezspin(file = “uhcplots/example_2_temp.R”, out_dir =“uhcplots/output”, fig_dir = “figures”, keep_md = FALSE)