library(pROC)
library(car)
library(moments)
setwd("C:/Users/bills/OneDrive/Desktop/VPM postdoc/GPMooseCaptureData/ROC")

ROC8_19<-read.csv("ROCFinal.csv", header = TRUE)

glm.fit=glm(ROC8_19$Pregnant ~ ROC8_19$Prog, family=binomial, data=ROC8_19)

#roc <- roc(ROC8_19$Pregnant, glm.fit$fitted.values, plot = TRUE)
roc <-roc(ROC8_19$Pregnant, ROC8_19$Prog, plot = TRUE)

roc(ROC8_19$Pregnant, glm.fit$fitted.values, plot=TRUE, legacy.axes=TRUE,
  percent=TRUE, xlab="False positive (%)", ylab="True postive (%)", col="#377eb8", lwd=4,
  ylim = c(0,100), xlim= c(100,0))

## 
## Call:
## roc.default(response = ROC8_19$Pregnant, predictor = glm.fit$fitted.values,     percent = TRUE, plot = TRUE, legacy.axes = TRUE, xlab = "False positive (%)",     ylab = "True postive (%)", col = "#377eb8", lwd = 4, ylim = c(0,         100), xlim = c(100, 0))
## 
## Data: glm.fit$fitted.values in 29 controls (ROC8_19$Pregnant 0) < 58 cases (ROC8_19$Pregnant 1).
## Area under the curve: 99.02%
roc.info <- roc(ROC8_19$Pregnant, glm.fit$fitted.values, legacy.axes=TRUE)

roc.df <- data.frame(
  tpp=roc.info$sensitivities*100,
  fpp=(1 - roc.info$specificities)*100,
  thresholds=roc.info$thresholds, ci = TRUE)

threshold<-coords(roc,x="best", input = "threshold", best.method = "closest.topleft")

threshold
##   threshold specificity sensitivity
## 1     1.115   0.9655172   0.9827586
ci.thresholds(roc, threshold = threshold$threshold)
## 95% CI (2000 stratified bootstrap replicates):
##  thresholds sp.low sp.median sp.high se.low se.median se.high
##       1.115 0.8966    0.9655       1 0.9483    0.9828       1

####BOXPLOTS###

number.calves <- subset(ROC8_19, Number==0 | Number==1 | Number==2)
  
boxplot(number.calves$Prog~number.calves$Number, ylab = "Serum progesterone concentration (ng/ml)", xlab = "Number of calves", col=c("red","blue","green"))
text(1,8, "a")
text(2,8, "b")
text(3,8, "b")

###Test for normality
agostino.test(number.calves$Prog)  
## 
##  D'Agostino skewness test
## 
## data:  number.calves$Prog
## skew = 0.79146, z = 2.88437, p-value = 0.003922
## alternative hypothesis: data have a skewness
#Progesterone not normally distributed.  Log-transformed (1+progesterone values so no zeroes)
logprog <- log(1+number.calves$Prog)
agostino.test(logprog)
## 
##  D'Agostino skewness test
## 
## data:  logprog
## skew = -0.24316, z = -0.96698, p-value = 0.3336
## alternative hypothesis: data have a skewness
ks.test(number.calves$Prog,pnorm(100, mean = 5, sd = 3))
## Warning in ks.test(number.calves$Prog, pnorm(100, mean = 5, sd = 3)): cannot
## compute exact p-value with ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  number.calves$Prog and pnorm(100, mean = 5, sd = 3)
## D = 0.65476, p-value = 0.7906
## alternative hypothesis: two-sided
ks.test(logprog,pnorm(100, mean = 5, sd = 3))
## Warning in ks.test(logprog, pnorm(100, mean = 5, sd = 3)): cannot compute exact
## p-value with ties
## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  logprog and pnorm(100, mean = 5, sd = 3)
## D = 0.61905, p-value = 0.8433
## alternative hypothesis: two-sided
ANOVA <- aov(logprog~number.calves$Number)
summary(ANOVA)
##                      Df Sum Sq Mean Sq F value Pr(>F)    
## number.calves$Number  2 27.274  13.637     163 <2e-16 ***
## Residuals            81  6.776   0.084                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(ANOVA)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = logprog ~ number.calves$Number)
## 
## $`number.calves$Number`
##          diff         lwr       upr     p adj
## 1-0 1.1337937  0.96253490 1.3050524 0.0000000
## 2-0 1.3070099  1.09980765 1.5142121 0.0000000
## 2-1 0.1732162 -0.02522185 0.3716542 0.0995064
###Supplemental density plots

###Pregnant vs non-pregnant
Pregnancy.status<-as.factor(ROC8_19$Pregnant)
densityPlot(ROC8_19$Prog, Pregnancy.status, lwd=2,xlab = "Serum progesterone (ng/ml)")
abline(v=threshold$threshold,col="red",lwd=2)

####Singleton vs twin
littersize <- subset(ROC8_19, Number ==1 | Number ==2)
littersize$twin <- ifelse(littersize$Number == 1, 0, 1) #changes Number from 1 and 2, to 0 = single, 1 = twin
twin.glm.fit=glm(littersize$twin ~ littersize$Prog, family=binomial, data=littersize)

twin.roc <- roc(littersize$twin, twin.glm.fit$fitted.values, plot = TRUE)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

roc(littersize$twin, twin.glm.fit$fitted.values, plot=TRUE, legacy.axes=TRUE,
  percent=TRUE, xlab="False positive (%)", ylab="True postive (%)", col="#377eb8", lwd=4,
  ylim = c(0,100), xlim= c(100,0))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## 
## Call:
## roc.default(response = littersize$twin, predictor = twin.glm.fit$fitted.values,     percent = TRUE, plot = TRUE, legacy.axes = TRUE, xlab = "False positive (%)",     ylab = "True postive (%)", col = "#377eb8", lwd = 4, ylim = c(0,         100), xlim = c(100, 0))
## 
## Data: twin.glm.fit$fitted.values in 37 controls (littersize$twin 0) < 18 cases (littersize$twin 1).
## Area under the curve: 63.81%
twin.roc.info <- roc(littersize$twin, twin.glm.fit$fitted.values, legacy.axes=TRUE)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
twin.roc.df <- data.frame(
  tpp=twin.roc.info$sensitivities*100,
  fpp=(1 - twin.roc.info$specificities)*100,
  thresholds=twin.roc.info$thresholds, ci = TRUE)

twin.threshold<-coords(twin.roc,x="best", input = "threshold", best.method = "closest.topleft")

twin.threshold
##   threshold specificity sensitivity
## 1 0.2990772   0.5675676   0.6666667
ci.thresholds(twin.roc, threshold = twin.threshold$threshold)
## 95% CI (2000 stratified bootstrap replicates):
##  thresholds sp.low sp.median sp.high se.low se.median se.high
##   0.2990772 0.4054    0.5676  0.7297 0.4444    0.6667  0.8889
littersize$Number <- as.factor(littersize$Number)
Number.of.calves<-littersize$Number
densityPlot(littersize$Prog, Number.of.calves, lwd=2,xlab = "Serum progesterone concentration (ng/ml)")
abline(v=twin.threshold$threshold,col="red",lwd=2)

sessionInfo() 
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] moments_0.14  car_3.0-12    carData_3.0-5 pROC_1.18.0  
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.8      digest_0.6.29   plyr_1.8.6      R6_2.5.1       
##  [5] jsonlite_1.7.3  magrittr_2.0.1  evaluate_0.14   highr_0.9      
##  [9] rlang_0.4.12    stringi_1.7.6   jquerylib_0.1.4 bslib_0.3.1    
## [13] rmarkdown_2.11  tools_4.1.2     stringr_1.4.0   abind_1.4-5    
## [17] xfun_0.29       yaml_2.2.1      fastmap_1.1.0   compiler_4.1.2 
## [21] htmltools_0.5.2 knitr_1.37      sass_0.4.0