library(lubridate)
library(dplyr)
setwd("C:/Users/bills/OneDrive/Desktop/MS Colt Survival")
rdata<-read.csv("ColtSurvData.csv")
#' ## Create data set
# Create data set with the following variables
# - ID = animal identifier
# - cens = 1 (if dead, 0 if censored)
# - r = date discovered dead (missing if the bird was still alive at 100 days)
# - l = last known date alive
# - ed = date of capture
# - bd = birth date
# - starta = age when captured
# - enda = age at last observation
rdata$cens = 1- rdata$Surv
rdata$ed <- mdy(rdata$capture.date)
rdata$bd <- rdata$ed - rdata$Cap.age.days
rdata$l <- mdy(rdata$lastdateknownalive)
rdata$r <- mdy(rdata$firstdateknowndead)
rdata$ID<-rdata$colt_ID
rdata$starta<-as.numeric((rdata$ed-rdata$bd))
rdata$enda<-as.numeric((rdata$r-rdata$bd))
rdata<-rdata %>% filter(colt_ID!="2015-31") %>%
select(ID, bd, ed, l, r, starta, enda, cens)
rdata<-rdata %>% filter(ID!="2015-6") %>%
select(ID, bd, ed, l, r, starta, enda, cens)
#’ Function for interval censored survival data #’ - daily survival probability = exp(-exp(xbeta)) #’ - daily probability of death = 1-exp(-exp(xbeta)) ###
mlefun<-function(par,dat){
beta<-par
nd<-nrow(dat)
ui<-unique(dat$ID)
count<-1
logL<-matrix(NA,length(ui),1)
for(i in ui){ # loop over individuals
tdat<-dat[dat$ID==i,]
starta<-as.numeric((tdat$ed-tdat$bd))
enda<-as.numeric((tdat$r-tdat$bd))
if(tdat$cens==1 & tdat$r==tdat$l){# known date of death
aget<-seq(starta, enda, by=1)
nl<-length(aget)
# Calculate the log daily survival probability from day of entry to left endpoint
logdsurv <- -exp(cbind(rep(1,length(aget)), aget)%*%beta)
# likelihood contribution survive all but last day, then dead on day nl
logL[count]<- sum(logdsurv[-nl])+log(1-exp(logdsurv[nl]))
}
if(tdat$cens==1 & tdat$r!=tdat$l){# date of death occurs between l and r
aget<-seq(starta, enda, by=1)
nl<-length(aget)
nll<-(tdat$r-tdat$l) # days in last interval
# Calculate the daily hazard from day of entry to left endpoint
logdsurv <- -exp(cbind(rep(1,length(aget)),aget)%*%beta)
# likelihood contribution survive up to last interval, then die last interval
logL[count]<- sum(logdsurv[1:(nl-nll)])+log(1-exp(sum(logdsurv[-c(1:(nl-nll))])))
}
if(tdat$cens==0){#right censored
aget<-seq(starta, enda, by=1)
# Calculate the daily hazard from day of entry to left endpoint
logdsurv <- -exp(cbind(rep(1,length(aget)), aget)%*%beta)
# if cens==0, then likelihood contribution is exp(-sum(dayhaz | dates above))
logL[count]<- sum(logdsurv)
}
count<-count+1
}
logl<--sum(logL)
}
#' ## Estimate parameters using interval censored likelihood
# Estimate parameters (intercept and beta for age)
fit.noint2<- optim(c(-3.7400,-0.0297), mlefun, gr = NULL, method = c("BFGS"), dat=rdata, hessian = T)
fit.noint2
## $par
## [1] -3.19216559 -0.03324578
##
## $value
## [1] 35.63115
##
## $counts
## function gradient
## 30 6
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## [,1] [,2]
## [1,] 8.98532 532.3365
## [2,] 532.33651 35681.4234
fit.noint2$par[1]
## [1] -3.192166
# SE for intercept and beta[age]
sqrt(diag(solve(fit.noint2$hessian)))
## [1] 0.97902305 0.01553598
#Plots ###
set.seed(1)
nsims = 1000
Shat <- matrix(NA, nrow = 102, ncol = nsims)
uid <- unique(rdata$ID)
nID <- length(uid)
for(ii in 1:nsims){
bootIDs <- data.frame(ID = sample(x = uid, size = nID, replace = TRUE))
bootDat <- merge(bootIDs, rdata)
bootDat$ID<-as.factor(1:nrow(bootDat))
ages<-seq(19, 120, by = 1)
fit.noint2<- optim(c(-3.7400,-0.0297), mlefun, gr = NULL, method = c("BFGS"), dat=bootDat, hessian = T)
est.eta<-cbind(1, ages)%*%fit.noint2$par
dailyprobs<-exp(-exp(est.eta))
Shat[,ii]<-cumprod(dailyprobs)
}
#' Calculate 90% confidence intervals for Shat (CIs)
CIs<-apply(as.matrix(Shat), 1, function(x){quantile(x,probs=c(0.05,0.95))})
ages<-seq(19, 120, by = 1)
est.eta<-cbind(1, ages)%*%fit.noint2$par
se.eta<-sqrt(diag(cbind(1, ages)%*%solve(fit.noint2$hessian)%*%t(cbind(1, ages))))
dailyprobs<-exp(-exp(est.eta))
lci.dailyprobs<-exp(-exp(est.eta - 1.645*se.eta))#90%CI
uci.dailyprobs<-exp(-exp(est.eta + 1.645*se.eta))#90%CI
par(mfrow=c(1,2), bty="L")
par(mar=c(5,6,4,1)+0.1)
plot(ages, 1-dailyprobs, type="l", lwd=2,ylim=c(0, 0.03), ylab="Daily mortality probability",xlab = "Age (days)")
lines(ages, 1-lci.dailyprobs, lty=2)
lines(ages, 1-uci.dailyprobs, lty=2)
legend("topright", legend=c("A"),bty="n")
plot(ages, cumprod(dailyprobs), type="l", lwd=2,ylab=expression(paste(hat(italic(S)),"(t) | ",italic(S),"(19)=1")),
xlab = "Age (days)", ylim = c(0,1))
lines(ages, CIs[1,], lty=3, lwd=2)
lines(ages, CIs[2,], lty=3, lwd=2)
legend("topright", legend=c("B"),bty="n")
#' Store results in Shat (for survival curve) and hhat for hazard curve
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dplyr_1.0.7 lubridate_1.8.0
##
## loaded via a namespace (and not attached):
## [1] knitr_1.37 magrittr_2.0.1 tidyselect_1.1.1 R6_2.5.1
## [5] rlang_0.4.12 fastmap_1.1.0 fansi_1.0.2 highr_0.9
## [9] stringr_1.4.0 tools_4.1.2 xfun_0.29 utf8_1.2.2
## [13] DBI_1.1.2 jquerylib_0.1.4 htmltools_0.5.2 ellipsis_0.3.2
## [17] assertthat_0.2.1 yaml_2.2.1 digest_0.6.29 tibble_3.1.6
## [21] lifecycle_1.0.1 crayon_1.4.2 purrr_0.3.4 sass_0.4.0
## [25] vctrs_0.3.8 glue_1.6.0 evaluate_0.14 rmarkdown_2.11
## [29] stringi_1.7.6 compiler_4.1.2 bslib_0.3.1 pillar_1.7.0
## [33] generics_0.1.2 jsonlite_1.7.3 pkgconfig_2.0.3