library(lubridate)
library(dplyr)
setwd("C:/Users/bills/OneDrive/Desktop/MS Colt Survival")
rdata<-read.csv("ColtSurvData.csv")
#' ## Create data set
# Create data set with the following variables
# - ID = animal identifier
# - cens = 1 (if dead, 0 if censored)
# - r = date discovered dead (missing if the bird was still alive at 100 days)
# - l = last known date alive
# - ed = date of capture
# - bd = birth date
# - starta = age when captured
# - enda = age at last observation
rdata$cens = 1- rdata$Surv
rdata$ed <- mdy(rdata$capture.date)
rdata$bd <- rdata$ed - rdata$Cap.age.days
rdata$l <- mdy(rdata$lastdateknownalive)
rdata$r <- mdy(rdata$firstdateknowndead)
rdata$ID<-rdata$colt_ID
rdata$starta<-as.numeric((rdata$ed-rdata$bd))
rdata$enda<-as.numeric((rdata$r-rdata$bd))
rdata<-rdata %>%  filter(colt_ID!="2015-31") %>%
     select(ID, bd, ed, l, r, starta, enda, cens) 
rdata<-rdata %>%  filter(ID!="2015-6") %>%
     select(ID, bd, ed, l, r, starta, enda, cens) 

#’ Function for interval censored survival data #’ - daily survival probability = exp(-exp(xbeta)) #’ - daily probability of death = 1-exp(-exp(xbeta)) ###

 mlefun<-function(par,dat){
    beta<-par
    nd<-nrow(dat)
    ui<-unique(dat$ID)
    count<-1
    logL<-matrix(NA,length(ui),1)
    for(i in ui){  # loop over individuals
       tdat<-dat[dat$ID==i,]  
       starta<-as.numeric((tdat$ed-tdat$bd))
       enda<-as.numeric((tdat$r-tdat$bd))
       if(tdat$cens==1 & tdat$r==tdat$l){# known date of death
          aget<-seq(starta, enda, by=1)
          nl<-length(aget)
          
          # Calculate the log daily survival probability from day of entry to left endpoint
          logdsurv <- -exp(cbind(rep(1,length(aget)), aget)%*%beta)
          
          # likelihood contribution survive all but last day, then dead on day nl     
          logL[count]<- sum(logdsurv[-nl])+log(1-exp(logdsurv[nl]))
       }
       if(tdat$cens==1 & tdat$r!=tdat$l){# date of death occurs between l and r
          aget<-seq(starta, enda, by=1) 
          nl<-length(aget)
          nll<-(tdat$r-tdat$l) # days in last interval
          
          # Calculate the daily hazard from day of entry to left endpoint
          logdsurv <- -exp(cbind(rep(1,length(aget)),aget)%*%beta)
          
          #  likelihood contribution survive up to last interval, then die last interval    
          logL[count]<- sum(logdsurv[1:(nl-nll)])+log(1-exp(sum(logdsurv[-c(1:(nl-nll))])))
       }
       if(tdat$cens==0){#right censored
          aget<-seq(starta, enda, by=1)
          
          # Calculate the daily hazard from day of entry to left endpoint
          logdsurv <- -exp(cbind(rep(1,length(aget)), aget)%*%beta)
          
          # if cens==0, then likelihood contribution is exp(-sum(dayhaz | dates above))    
          logL[count]<- sum(logdsurv)
       }
       count<-count+1
    }
    logl<--sum(logL)
 }
 
#' ## Estimate parameters using interval censored likelihood
# Estimate parameters (intercept and beta for age)
 fit.noint2<- optim(c(-3.7400,-0.0297), mlefun, gr = NULL, method = c("BFGS"), dat=rdata, hessian = T)
 fit.noint2
## $par
## [1] -3.19216559 -0.03324578
## 
## $value
## [1] 35.63115
## 
## $counts
## function gradient 
##       30        6 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
## 
## $hessian
##           [,1]       [,2]
## [1,]   8.98532   532.3365
## [2,] 532.33651 35681.4234
 fit.noint2$par[1]
## [1] -3.192166
 # SE for intercept and beta[age]
 sqrt(diag(solve(fit.noint2$hessian)))
## [1] 0.97902305 0.01553598

#Plots ###

set.seed(1)
 
 nsims = 1000
 Shat <- matrix(NA, nrow = 102, ncol = nsims) 
 uid <- unique(rdata$ID) 
 nID <- length(uid) 
 for(ii in 1:nsims){
   bootIDs <- data.frame(ID = sample(x = uid, size = nID, replace = TRUE))
   bootDat <- merge(bootIDs, rdata)
   bootDat$ID<-as.factor(1:nrow(bootDat))
   ages<-seq(19, 120, by = 1)
   fit.noint2<- optim(c(-3.7400,-0.0297), mlefun, gr = NULL, method = c("BFGS"), dat=bootDat, hessian = T)
   est.eta<-cbind(1, ages)%*%fit.noint2$par
   dailyprobs<-exp(-exp(est.eta))
   Shat[,ii]<-cumprod(dailyprobs)
 }  
 
 #' Calculate 90% confidence intervals for Shat (CIs)

 CIs<-apply(as.matrix(Shat), 1, function(x){quantile(x,probs=c(0.05,0.95))})
 
 ages<-seq(19, 120, by = 1)
 est.eta<-cbind(1, ages)%*%fit.noint2$par
 se.eta<-sqrt(diag(cbind(1, ages)%*%solve(fit.noint2$hessian)%*%t(cbind(1, ages))))
 dailyprobs<-exp(-exp(est.eta))
 lci.dailyprobs<-exp(-exp(est.eta - 1.645*se.eta))#90%CI
 uci.dailyprobs<-exp(-exp(est.eta + 1.645*se.eta))#90%CI
 par(mfrow=c(1,2), bty="L")
 par(mar=c(5,6,4,1)+0.1)
  plot(ages, 1-dailyprobs, type="l", lwd=2,ylim=c(0, 0.03), ylab="Daily mortality probability",xlab = "Age (days)")
 lines(ages, 1-lci.dailyprobs, lty=2)
 lines(ages, 1-uci.dailyprobs, lty=2)
 legend("topright", legend=c("A"),bty="n")
 plot(ages, cumprod(dailyprobs), type="l", lwd=2,ylab=expression(paste(hat(italic(S)),"(t) | ",italic(S),"(19)=1")),
         xlab = "Age (days)", ylim = c(0,1))
 lines(ages, CIs[1,], lty=3, lwd=2)
 lines(ages, CIs[2,], lty=3, lwd=2)
 legend("topright", legend=c("B"),bty="n")

 #' Store results in Shat (for survival curve) and hhat for hazard curve
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_1.0.7     lubridate_1.8.0
## 
## loaded via a namespace (and not attached):
##  [1] knitr_1.37       magrittr_2.0.1   tidyselect_1.1.1 R6_2.5.1        
##  [5] rlang_0.4.12     fastmap_1.1.0    fansi_1.0.2      highr_0.9       
##  [9] stringr_1.4.0    tools_4.1.2      xfun_0.29        utf8_1.2.2      
## [13] DBI_1.1.2        jquerylib_0.1.4  htmltools_0.5.2  ellipsis_0.3.2  
## [17] assertthat_0.2.1 yaml_2.2.1       digest_0.6.29    tibble_3.1.6    
## [21] lifecycle_1.0.1  crayon_1.4.2     purrr_0.3.4      sass_0.4.0      
## [25] vctrs_0.3.8      glue_1.6.0       evaluate_0.14    rmarkdown_2.11  
## [29] stringi_1.7.6    compiler_4.1.2   bslib_0.3.1      pillar_1.7.0    
## [33] generics_0.1.2   jsonlite_1.7.3   pkgconfig_2.0.3