Primary programmer: John Fieberg | Secondary programmer: Althea A. ArchMiller
Date: 20160805
Complete code and analysis for simulation examples in: Fieberg et al. Species distribution models: Predictive snipers or shots in the dark?
Missing Predictor example. Simulate data for example with the probability of selection proportional to \(\exp(0.5x_1 - x_2)\), where \(x_1\) is elevation and \(x_2\) is precipitation.
These data were generated under the following scenario:
var(\(x_1\)) = var(\(x_2\)) = 4 and cor(\(x_1,x_2\)) = 0.3 for the training data set and -0.3 for the test data set
Set seed
set.seed(1000)
Load libraries
library(ezknitr)
library(MASS)
library(ggplot2)
library(KernSmooth)
uhcplots functions from GitHub
library(devtools)
install_github("aaarchmiller/uhcplots")
Skipping install of 'uhcplots' from a github remote, the SHA1 (1f479193) has not changed since last install.
Use `force = TRUE` to force installation
library(uhcplots)
#################################################################
Global settings
nused_train <- nused_test <- 100
navail_train <- navail_test <- 10000
ntemp <- 1000000
nsims <- 1000 # M for step 3 of UHC method
Example-specific settings
example <- "missing predictor"
set.seed(1000)
betas <- c(0.5, -1)
xlabs <- c("Elevation", "Precipitation")
Scenario-specific settings
corx.train <- 0.3
corx.test <- -0.3
scenario <- "corrPN"
labcor1 <- expression(rho[list(x[1],x[2])]==0.3)
labcor2 <- expression(rho[list(x[1],x[2])]==-0.3)
textplot="Expected number of used points in each bin"
panlabs1 <- c("E)","F)")
panlabs2 <- c("I)","J)", "K)", "L)")
top <- 0
outfig1 <- "Paper/graphics/Boyce1c.pdf"
outfig2 <- "Paper/graphics/crosssim1c.pdf"
#################################################################
Training data
traindat <- uhcdatasimulator(nused = nused_train,
navail = navail_train,
betas = betas,
corx = corx.train,
ntemp = ntemp,
example = example)
Test data
testdat <- uhcdatasimulator(nused = nused_test,
navail = navail_test,
betas = betas,
corx = corx.test,
ntemp = ntemp,
example = example)
#################################################################
Fit 2 models
# correct model
train.correct <- glm(y~elev+precip, family=binomial(), data=traindat)
summary(train.correct)
Call:
glm(formula = y ~ elev + precip, family = binomial(), data = traindat)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3180 -0.1060 -0.0551 -0.0288 4.3442
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.46173 0.23424 -27.587 <2e-16 ***
elev 0.56580 0.05953 9.504 <2e-16 ***
precip -0.97025 0.06258 -15.505 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1122.03 on 10099 degrees of freedom
Residual deviance: 769.29 on 10097 degrees of freedom
AIC: 775.29
Number of Fisher Scoring iterations: 9
# missing precip
train.missingprecip <- glm(y~elev, family=binomial(), data=traindat)
summary(train.missingprecip)
Call:
glm(formula = y ~ elev, family = binomial(), data = traindat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.3237 -0.1573 -0.1309 -0.1087 3.4470
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.75289 0.11436 -41.561 < 2e-16 ***
elev 0.27247 0.05109 5.334 9.63e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1122.0 on 10099 degrees of freedom
Residual deviance: 1093.3 on 10098 degrees of freedom
AIC: 1097.3
Number of Fisher Scoring iterations: 8
Save regression models (to make a table later)
save(train.correct, train.missingprecip,
file=paste0("examples/Regmods/Regmods_", scenario, ".R"))
#################################################################
source("examples/functions_not_in_uhcplots/ecalcrsf.R")
Steps 1 & 2: Incorrect model.
# [1.]
wx.hat.missingprecip <- exp(predict(train.missingprecip, newdata=testdat))
# [2.]
cal.missingprecip <- ecalrsf(preds = wx.hat.missingprecip,
y_test = testdat$y,
nbins = 10,
do_plot = F)
# Relationship between observed (nx) and expected (Nx) number of pts per bin
lmfit.missingprecip <- lm(nx~Nx, data=cal.missingprecip)
# Spearman correlation coefficient
sp.mcor <- round(cor(cal.missingprecip$Nx, cal.missingprecip$nx,
method="spearman"),2)
# Confidence intervals
pmiss <- predict(lmfit.missingprecip, interval="confidence")
Steps 1 & 2: Correct model.
# [1.]
wx.hat.correct <- exp(predict(train.correct, newdata=testdat))
# [2.]
cal.correct <- ecalrsf(preds = wx.hat.correct,
y_test = testdat$y,
nbins = 10,
do_plot=FALSE)
# Relationship between observed (nx) and expected (Nx) number of pts per bin
lmfit.correct <- lm(nx~Nx, data=cal.correct)
# Spearman correlation coefficient
sp.ccor <- round(cor(cal.correct$Nx, cal.correct$nx, method="spearman"),2)
# Confidence intervals
pcorrect <- predict(lmfit.correct, interval="confidence")
[3.] Produce a calibration plot formatted for manuscript
par(mar=c(4,2,2,2), oma=c(1, 4, 7, 0), bty="L")
layout(matrix(c(1,2,2,3,3), nrow=1))
# Use the first plot for text
plot(1, type="n", xlab="", ylab="", xlim=c(0, 1), ylim=c(0, 1), xaxt='n',
yaxt='n',bty='n')
text(0.000001, 0.9, "Training Data", cex=3, adj=0)
text(0.1, 0.78, labcor1 , cex=3,adj=0)
text(0.000001, 0.62, "Test Data", cex=3, adj=0)
text(0.1, 0.50, labcor2 , cex=3, adj=0)
# modify top plots that don't require x-axis label
if(nchar(textplot)< 5){par(mar=c(2,2,2,2))}
# Incorrect Model
with(cal.missingprecip, plot(Nx, nx, xlab="", ylab=" ", pch=16, xlim=c(0, max(Nx))))
abline(lmfit.missingprecip)
abline(0,1, lty=1, col="gray", lwd=2)
lines(cal.missingprecip$Nx, pmiss[,2], lty=2)
lines(cal.missingprecip$Nx, pmiss[,3], lty=2)
mtext(side=3, line=1, panlabs1[1], cex=1.2, ad=0)
r2 <- round(summary(lmfit.missingprecip)$r.square,2)
b0 <- round(coef(lmfit.missingprecip)[1],2)
b1 <- round(coef(lmfit.missingprecip)[2],2)
# Add text with placement depending on axis limits
ax.lims <- par("usr")
delt.lims <- c(ax.lims[2]-ax.lims[1],ax.lims[4]-ax.lims[3])
text.x.1 <- ax.lims[1] + 0.078*delt.lims[1]
text.x.2 <- ax.lims[1] + 0.14*delt.lims[1]
text.y.1 <- ax.lims[3] + 0.9*delt.lims[2]
text.y.2 <- ax.lims[3] + 0.82*delt.lims[2]
text.y.3 <- ax.lims[3] + 0.74*delt.lims[2]
text.y.4 <- ax.lims[3] + 0.6*delt.lims[2]
text.y.5 <- ax.lims[3] + 0.52*delt.lims[2]
text(text.x.1,text.y.1, "Regression equation", adj=0, cex=2)
text(text.x.2,text.y.2, bquote(~y == .(b0) + .(b1)*x), adj=0, cex=2)
text(text.x.2,text.y.3, bquote(~R^2 == .(r2)), adj=0, cex=2)
text(text.x.1,text.y.4, "Spearman Correlation", adj=0, cex=2)
text(text.x.2,text.y.5, bquote( .(sp.mcor)), adj=0, cex=2)
mtext(outer=F, side=2, line=3, "Observed number of used points in each bin",
cex=1.6)
# Correct model
with(cal.correct, plot(Nx, nx, xlab=" ", ylab=" ", pch=16, xlim=c(0, max(Nx))))
abline(lmfit.correct)
lines(cal.correct$Nx, pcorrect[,2], lty=2)
lines(cal.correct$Nx, pcorrect[,3], lty=2)
abline(0,1, lty=1, col="gray", lwd=2)
mtext(side=3, line=1, panlabs1[2], cex=1.2, ad=0)
r2 <- round(summary(lmfit.correct)$r.square,2)
b0 <- round(coef(lmfit.correct)[1],2)
b1 <- round(coef(lmfit.correct)[2],2)
# Add text with placement depending on axis limits
ax.lims <- par("usr")
delt.lims <- c(ax.lims[2]-ax.lims[1],ax.lims[4]-ax.lims[3])
text.x.1 <- ax.lims[1] + 0.078*delt.lims[1]
text.x.2 <- ax.lims[1] + 0.14*delt.lims[1]
text.y.1 <- ax.lims[3] + 0.9*delt.lims[2]
text.y.2 <- ax.lims[3] + 0.82*delt.lims[2]
text.y.3 <- ax.lims[3] + 0.74*delt.lims[2]
text.y.4 <- ax.lims[3] + 0.6*delt.lims[2]
text.y.5 <- ax.lims[3] + 0.52*delt.lims[2]
text(text.x.1,text.y.1, "Regression equation", adj=0, cex=2)
text(text.x.2,text.y.2, bquote(~y == .(b0) + .(b1)*x), adj=0, cex=2)
text(text.x.2,text.y.3, bquote(~R^2 == .(r2)), adj=0, cex=2)
text(text.x.1,text.y.4, "Spearman Correlation", adj=0, cex=2)
text(text.x.2,text.y.5, bquote( .(sp.ccor)), adj=0, cex=2)
mtext(outer=T, side=1, line=0, textplot, adj=0.6, cex=1.6)
if(top==1){
mtext(outer=T, side=3.5, line=4, adj=0.80,
expression(y %~% elev+precip), cex=2.5)
mtext(outer=T, side=3.5, line=4, adj=0.4,
expression(y %~% elev), cex=2.5)
}
plot.calib <- recordPlot()
RStudioGD
2
#################################################################
Incorrect model
# uhcsim function
xhat.missingprecip <- uhcsim(nsims = nsims,
nused_test = nused_test,
xmat = as.matrix(testdat[, c("elev")]),
fit_rsf = train.missingprecip,
z = testdat[, c("elev","precip")])
# uhcdenscalc function for both predictors
denshats.elev.missingprecip <- uhcdenscalc(rand_sims = xhat.missingprecip[,,1],
dat = subset(testdat,
y==1, select="elev"),
avail = subset(testdat,
y==0, select="elev"))
denshats.precip.missingprecip <- uhcdenscalc(rand_sims = xhat.missingprecip[,,2],
dat = subset(testdat,
y==1, select="precip"),
avail = subset(testdat,
y==0, select="precip"))
Correct model
# uhcsim function
xhat.correct <- uhcsim(nsims = 1000,
nused_test = nused_test,
xmat = testdat[, c("elev","precip")],
fit_rsf = train.correct,
z = testdat[, c("elev","precip")])
# uhcdenscalc function for both predictors
denshats.elev.correct <- uhcdenscalc(rand_sims = xhat.correct[,,1],
dat = subset(testdat, y==1, select="elev"),
avail = subset(testdat, y==0, select="elev"))
denshats.precip.correct <- uhcdenscalc(rand_sims = xhat.correct[,,2],
dat = subset(testdat, y==1, select="precip"),
avail = subset(testdat,
y==0, select="precip"))
Uses uhcdensplot function
par(mfrow=c(1,5), mar=c(2,2,2,2), oma=c(3, 0, 5, 0), bty="L")
# Use the first plot for text
plot(1, type="n", xlab="", ylab="",
xlim=c(0, 1), ylim=c(0, 1),
xaxt='n',yaxt='n',bty='n')
text(0.01, 0.9, "Training Data", cex=3, adj=0)
text(0.2, 0.72, labcor1 , cex=3,adj=0)
text(0.01, 0.52, "Test Data", cex=3, adj=0)
text(0.2, 0.40, labcor2 , cex=3, adj=0)
# Incorrect model (missing predictor)
uhcdensplot(densdat = denshats.elev.missingprecip$densdat,
densrand = denshats.elev.missingprecip$densrand,
xl = c(-4,9),
includeAvail = TRUE,
densavail = denshats.elev.missingprecip$densavail,
includeLegend = T)
mtext(outer=F, side=2, line=3, "Density", cex=1.3)
mtext(side=3, line=1, panlabs2[1], cex=1.2, ad=0)
mtext(outer=F, side=1, line=4, xlabs[1], cex=1.6)
uhcdensplot(densdat = denshats.precip.missingprecip$densdat,
densrand = denshats.precip.missingprecip$densrand,
xl=c(-9,6),
includeAvail = TRUE,
densavail = denshats.precip.missingprecip$densavail,
includeLegend = F)
mtext(outer=F, side=1, line=4, xlabs[2], cex=1.6)
mtext(side=3, line=1, panlabs2[2], cex=1.2, ad=0)
# Correct model (both predictors)
uhcdensplot(densdat = denshats.elev.correct$densdat,
densrand = denshats.elev.correct$densrand,
xl = c(-5,9),
includeAvail = TRUE,
densavail = denshats.elev.correct$densavail,
includeLegend = F)
mtext(side=3, line=1, panlabs2[3], cex=1.2, ad=0)
mtext(outer=F, side=1, line=4, xlabs[1], cex=1.6)
uhcdensplot(densdat = denshats.precip.correct$densdat,
densrand = denshats.precip.correct$densrand,
xl = c(-9,3.5),
includeAvail = TRUE,
densavail = denshats.precip.correct$densavail,
includeLegend = F)
mtext(side=3, line=1, panlabs2[4], cex=1.2, ad=0)
mtext(outer=F, side=1, line=4, xlabs[2], cex=1.6)
if(top==1){
mtext(outer=T, side=3.5, line=2, adj=0.80,
expression(y %~% elev+precip), cex=2.5)
mtext(outer=T, side=3.5, line=2, adj=0.40,
expression(y %~% elev), cex=2.5)
}
uhc.plot <- recordPlot()
RStudioGD
2
#################################################################
\(f^u(z)-\hat{f}^u(z)_i\)
Uses uhcdiffdensplot function
par(mfrow=c(1,5), mar=c(2,2,2,2), oma=c(3, 0, 5, 0), bty="L")
# Use the first plot for text
plot(1, type="n", xlab="", ylab="",
xlim=c(0, 1), ylim=c(0, 1),
xaxt='n',yaxt='n',bty='n')
text(0.01, 0.9, "Training Data", cex=3, adj=0)
text(0.2, 0.72, labcor1 , cex=3,adj=0)
text(0.01, 0.52, "Test Data", cex=3, adj=0)
text(0.2, 0.40, labcor2 , cex=3, adj=0)
# Incorrect model (missing predictor)
uhcdiffdensplot(densdat = denshats.elev.missingprecip$densdat,
densrand = denshats.elev.missingprecip$densrand,
xl = c(-5,9))
mtext(outer=F, side=2, line=3, "Density", cex=1.3)
mtext(side=3, line=1, panlabs2[1], cex=1.2, ad=0)
mtext(outer=F, side=1, line=4, xlabs[1], cex=1.6)
uhcdiffdensplot(densdat = denshats.precip.missingprecip$densdat,
densrand = denshats.precip.missingprecip$densrand,
xl=c(-9,3.5))
mtext(outer=F, side=1, line=4, xlabs[2], cex=1.6)
mtext(side=3, line=1, panlabs2[2], cex=1.2, ad=0)
# Correct model (both predictors)
uhcdiffdensplot(densdat = denshats.elev.correct$densdat,
densrand = denshats.elev.correct$densrand,
xl = c(-4,9))
mtext(side=3, line=1, panlabs2[3], cex=1.2, ad=0)
mtext(outer=F, side=1, line=4, xlabs[1], cex=1.6)
uhcdiffdensplot(densdat = denshats.precip.correct$densdat,
densrand = denshats.precip.correct$densrand,
xl = c(-9,6))
mtext(side=3, line=1, panlabs2[4], cex=1.2, ad=0)
mtext(outer=F, side=1, line=4, xlabs[2], cex=1.6)
if(top==1){
mtext(outer=T, side=3.5, line=2, adj=0.80,
expression(y %~% elev+precip), cex=2.5)
mtext(outer=T, side=3.5, line=2, adj=0.40,
expression(y %~% elev), cex=2.5)
}
References:
Boyce, M.S., Vernier, P.R., Nielsen, S.E. & Schmiegelow, F.K. (2002). Evaluating resource selection functions. Ecological Modelling, 157, 281–300.
Johnson, C.J., Nielsen, S.E., Merrill, E.H., McDonald, T.L. & Boyce, M.S. (2006). Resource selection functions based on use-availability data: Theoretical motivation and evaluation methods. Journal of Wildlife Management, 70, 347-357.
Session Information
sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows >= 8 x64 (build 9200)
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] uhcplots_0.1.0 devtools_1.12.0 KernSmooth_2.23-15
[4] ggplot2_2.1.0 MASS_7.3-45 knitr_1.14
[7] ezknitr_0.6
loaded via a namespace (and not attached):
[1] Rcpp_0.12.7 magrittr_1.5 munsell_0.4.3
[4] colorspace_1.2-6 R6_2.2.0 stringr_1.1.0
[7] httr_1.2.1 plyr_1.8.4 tools_3.3.1
[10] grid_3.3.1 gtable_0.2.0 R.oo_1.20.0
[13] git2r_0.15.0 withr_1.0.2 digest_0.6.10
[16] formatR_1.4 R.utils_2.4.0 curl_2.1
[19] mime_0.5 memoise_1.0.0 evaluate_0.9
[22] stringi_1.1.2 scales_0.4.0 R.methodsS3_1.7.1
[25] markdown_0.7.7
Spun with: ezspin(file = “examples/example_1_corrPN.R”, out_dir =“examples/output”, fig_dir = “figures”, keep_md = FALSE)