pacman::p_load(tidyverse, RStata, lmtest, sandwich, prais)
setwd("/Users/shanchao/Documents/BCR Project/Data")
options("RStata.StataPath" = "/Applications/Stata/StataSE.app/Contents/MacOS/stata-se")
options("RStata.StataVersion" = 17)

# output plots names
residual.plot = "../plots/ResidualPlot_WI_std.png"
residual.plot.multi = "../plots/ResidualPlot_WI_std_multi.png"
ACF.plot  = "../plots/ACF_WI_std.png"
PACF.plot = "../plots/PACF_WI_std.png"
# output data names
res.white.test = "Processed Data/v4/white_tests_WI_std.csv"
res.bp.test = "Processed Data/v4/bp_tests_WI_std.csv"
res.dw.test = "Processed Data/v4/dw_tests_WI_std.csv"
res.bg.test = "Processed Data/v4/bg_tests_WI_std.csv"
res.robust.ols = "Processed Data/v4/res_robust_ols_WI_std.csv"
res.co.reg = "Processed Data/v4/res_co_reg_WI_std.csv"
res.pw.reg = "Processed Data/v4/res_pw_reg_WI_std.csv"
res.dols.reg  = "Processed Data/v4/res_dols_reg_WI_std.csv"
res.dols.test.wald = "Processed Data/v4/res_dols_test_wald.csv"
res.dols.test.pp   = "Processed Data/v4/res_dols_test_pp.csv"
res.dols.sw.reg = "Processed Data/v4/res_dols_sw_reg_WI_std.csv"
res.dgls.reg  = "Processed Data/v4/res_dgls_reg_WI_std.csv"
# obtain the model that pass time series tests
load("Processed Data/v4/TimeSeriesResults.RData")
model.names = rownames(table_output[[1]])
# load data for regressions, mydata
load("Processed Data/v4/Distribution.RData")
# only keep selected models (index already standardized)
model.data = mydata %>% select(YEAR, MFP, INDEX, TREND, any_of(model.names))
model.data$BLOOMLINEAR_K = model.data$BLOOMLINEAR

# plot the preferred model. Our first gamma model
model.res = lapply(model.names[model.names != "BLOOMLINEAR_K"], 
                   function(x) lm(log(MFP) ~ log(STOCK) + INDEX + TREND, 
                                  data = model.data %>% 
                                    select(YEAR, MFP, INDEX, TREND, all_of(x)) %>% 
                                    rename(STOCK = all_of(x))))
temp = lm(log(MFP) ~ STOCK + INDEX + TREND, 
          data = model.data %>% select(YEAR, MFP, INDEX, TREND, BLOOMLINEAR_K) %>% rename(STOCK = BLOOMLINEAR_K)) %>% list()
model.res = c(model.res, temp)
rm(temp)
cbind(YEAR = model.data$YEAR, model.res[[1]]$model, RESIDUALS = model.res[[1]]$residuals) %>% 
      ggplot(aes(x = YEAR, y = RESIDUALS)) + 
      geom_line() +
      labs(x = "year", y = "residual")
ggsave(residual.plot, height = 12, width = 16, units = "cm")

# plot residuals for gamma models.
data.frame(YEAR  = rep(model.data$YEAR, times = 3),
           Model = rep(model.names[1:3], each = length(model.data$YEAR)), 
           RESIDUALS = c(model.res[[1]]$residuals, model.res[[2]]$residuals, model.res[[3]]$residuals)) %>%
  write.csv(file = "Processed Data/v4/Figure5Residuals.csv")

data.frame(YEAR  = rep(model.data$YEAR, times = 3),
           Model = rep(model.names[1:3], each = length(model.data$YEAR)), 
           RESIDUALS = c(model.res[[1]]$residuals, model.res[[2]]$residuals, model.res[[3]]$residuals)) %>%
  ggplot(aes(x = YEAR, y = RESIDUALS)) + 
  geom_line(aes(linetype=Model)) +
  labs(x = "year", y = "residuals")
ggsave(residual.plot.multi, height = 12, width = 16, units = "cm")

# white test for all models
white_test = function(reg){
      regdata = data.frame(RESIDUAL = reg$residuals, FITTED = reg$fitted.values)
      res     = lm(RESIDUAL^2 ~ FITTED + I(FITTED^2), data = regdata)
      lm.stat = nrow(regdata)*summary(res)$r.squared
      p.value = pchisq(lm.stat, df = 2, lower.tail = F)
      res = c(lm.stat, p.value)
      names(res) = c("lm.stat", "p.value")
      return(res)
}
temp = sapply(model.res, white_test) %>% t() %>% as.data.frame() 
rownames(temp) = model.names
write.csv(temp, file = res.white.test)

# also perform the BP test
bp_test = function(x){
  temp_data = cbind(x$model, FITTED = x$fitted.values)
  colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND", "FITTED")
  temp = bptest(log_MFP ~ log_STOCK + INDEX + TREND, ~ FITTED, data = temp_data) 
  res  = c(temp$statistic, temp$p.value)
  names(res) = c("BP", "p.value")
  return(res)
}
temp = sapply(model.res, bp_test) %>% t() %>% as.data.frame() # cannot reject homoskedasticity assumption for model 1 at any significance level
rownames(temp) = model.names
write.csv(temp, file = res.bp.test)

# produce robust standard error for OLS: using Newey-West estimators (small sample adjustment) 
robust_res = function(res){
      temp = coeftest(res, vcov = NeweyWest(res, adjust = TRUE))[,-3] %>% t()
      out  = matrix(NA, nrow = nrow(temp), ncol = ncol(temp)*2) %>% as.data.frame()
      rownames(out) = rownames(temp)
      colnames(out)[seq(1,(ncol(out)-1), by = 2)] = colnames(temp)
      if("STOCK" %in% colnames(temp)){
        colnames(temp)[colnames(temp) == "STOCK"] = "log(STOCK)"
        out  = out %>% rename('log(STOCK)' = 'STOCK')
      }
      colnames(out)[seq(2,(ncol(out)), by = 2)]   = paste0("sig_",colnames(temp))
      for(i in 1:nrow(out)){
        out[i,] = rbind(temp[i,], NA) %>% c()
      }
      for(i in seq(1,(ncol(out)-1), by = 2)){
        if(out[3,i] < 0.01){
          out[1, i+1] = "***"
        }else if(out[3, i] < 0.05){
          out[1, i+1] = "**"
        }else if(out[3, i] < 0.1){
          out[1, i+1] = "*"
        }else{
          ""
        }
      }
      out[is.na(out)] = ""
      out = out[-nrow(out),]
      return(out)
}
HAC_lags = sapply(model.res, function(x) {floor(bwNeweyWest(x))})
robust_reg_res = lapply(model.res, robust_res) %>% bind_rows()
robust_reg_res$model = rbind(model.names, "") %>% c()
robust_reg_res = robust_reg_res %>% select(model, everything())

# include 95% confidence interval for elasticity 
temp.confin = lapply(model.res[model.names != "BLOOMLINEAR_K"], function(x) {coefci(x, vcov = NeweyWest(x, adjust = TRUE))["log(STOCK)",]}) %>% bind_rows() %>% as.data.frame()
temp = lapply(model.res[model.names == "BLOOMLINEAR_K"], function(x) {coefci(x, vcov = NeweyWest(x, adjust = TRUE))["STOCK",]}) %>% bind_rows() %>% as.data.frame()
temp.confin = rbind(temp.confin, temp)
robust_reg_res$'lower' = c(rbind(temp.confin[,1], NA))
robust_reg_res$'upper' = c(rbind(temp.confin[,2], NA))
robust_reg_res$lower[robust_reg_res$model == "BLOOMLINEAR_K"] = robust_reg_res$lower[robust_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K)
robust_reg_res$upper[robust_reg_res$model == "BLOOMLINEAR_K"] = robust_reg_res$upper[robust_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K)

robust_reg_res$elasticity = robust_reg_res$`log(STOCK)`
robust_reg_res$elasticity[which(robust_reg_res$model == "")] = NA
robust_reg_res$elasticity[robust_reg_res$model == "BLOOMLINEAR_K"] = robust_reg_res$elasticity[robust_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K)

write.csv(robust_reg_res, res.robust.ols, row.names = F, na="")
rm(temp, temp.confin)
# test for auto correlation
# Durbin-Watson Test
dw_test = function(x){
  temp = dwtest(x, alternative = "greater")
  out  = c(temp$statistic, temp$p.value)
  names(out) = c("DW", "p.value")
  return(out)
}
temp = lapply(model.res, dw_test) %>% bind_rows() %>% as.data.frame()
rownames(temp) = model.names
write.csv(temp, res.dw.test)

# Breusch-Godfrey Tests
bg_test = function(x){
  for (i in 1:3) {
    assign(paste0("order_", i), bgtest(x, order = i, type = 'Chisq', fill = NA)[c(1,4)] %>% bind_cols())
  }
  out = rbind(rbind(order_1, order_2),order_3)
  out$order = 1:3
  return(select(out, order, everything()))
}
temp = lapply(model.res, bg_test) %>% bind_rows() %>% as.data.frame()
temp$model = rep(model.names, each = 3)
temp = temp %>% select(model, everything())
write.csv(temp, res.bg.test, row.names = F)
# time series plots
png(ACF.plot, width = 600, height = 400)
acf(model.res[[1]]$residuals, xlim=c(1,10), main = "")
dev.off()
png(PACF.plot, width = 600, height = 400)
pacf(model.res[[1]]$residuals, lag.max = 10, main = "")
dev.off()

# adjust for autocorrelation: Cochrane-Orcutt
CO_res = function(x){
      temp_data = x$model
      colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND")
      temp_data = stata("tsset TREND
                        prais  log_MFP log_STOCK TREND INDEX, corc r ssesearch 
                        gen intercept = e(b)[1,4]
                        gen log_stock = e(b)[1,1]
                        gen index     = e(b)[1,3]
                        gen trend     = e(b)[1,2]
                        gen intercept_vcov = e(V)[4,4]
                        gen log_stock_vcov = e(V)[1,1]
                        gen index_vcov     = e(V)[3,3]
                        gen trend_vcov     = e(V)[2,2]
                        keep if _n == 1
                        drop log_MFP log_STOCK TREND INDEX", 
                        data.in  = temp_data,
                        data.out = T,
                        stata.echo = F)
      
      out  = matrix(c(rbind(as.numeric(temp_data[1,]), NA)), nrow = 2, byrow = T) %>% as.data.frame()
      colnames(out)[seq(1, ncol(temp_data), by = 2)] = colnames(temp_data)[1:(ncol(temp_data)/2)]
      out[2,] = sqrt(out[2,])
      
      #calculate p values
      df_residual = x$df.residual - 1
      out = rbind(out, sapply(out[1,]/out[2,], function(x) pt(abs(x), df = df_residual, lower.tail = F))*2)
      
      # fill in stars
      for(i in seq(1,(ncol(out)-1), by = 2)){
        if(out[3,i] < 0.01){
          out[1, i+1] = "***"
        }else if(out[3, i] < 0.05){
          out[1, i+1] = "**"
        }else if(out[3, i] < 0.1){
          out[1, i+1] = "*"
        }else{
          ""
        }
      }
      out = out[1:2,]
      out$lower = out$log_stock[1] - out$log_stock[2] * qt(0.975, df = df_residual)
      out$lower[2] = NA
      out$upper = out$log_stock[1] + out$log_stock[2] * qt(0.975, df = df_residual)
      out$upper[2] = NA
      out[is.na(out)] = "" 
      return(out)
}

CO_reg_res = lapply(model.res, CO_res) %>% bind_rows()
CO_reg_res$model = rbind(model.names, "") %>% c()
CO_reg_res = CO_reg_res %>% select(model, everything())
# calculate elasticity for Bloom models K and CI
CO_reg_res$elasticity = CO_reg_res$log_stock
CO_reg_res$elasticity[which(CO_reg_res$model == "")] = NA
CO_reg_res$elasticity[CO_reg_res$model == "BLOOMLINEAR_K"] = CO_reg_res$elasticity[CO_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K)
CO_reg_res$lower[CO_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(CO_reg_res$lower[CO_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
CO_reg_res$upper[CO_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(CO_reg_res$upper[CO_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
write.csv(CO_reg_res, file = res.co.reg, row.names = F, na = "")

# adjust for autocorrelation: Prais–Winsten
PW_res = function(x){
  temp_data = x$model
  colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND")
  temp_data = stata("tsset TREND
                        prais  log_MFP log_STOCK TREND INDEX, r ssesearch 
                        gen intercept = e(b)[1,4]
                        gen log_stock = e(b)[1,1]
                        gen index     = e(b)[1,3]
                        gen trend     = e(b)[1,2]
                        gen intercept_vcov = e(V)[4,4]
                        gen log_stock_vcov = e(V)[1,1]
                        gen index_vcov     = e(V)[3,3]
                        gen trend_vcov     = e(V)[2,2]
                        keep if _n == 1
                        drop log_MFP log_STOCK TREND INDEX", 
                    data.in  = temp_data,
                    data.out = T,
                    stata.echo = F)
  
  out  = matrix(c(rbind(as.numeric(temp_data[1,]), NA)), nrow = 2, byrow = T) %>% as.data.frame()
  colnames(out)[seq(1, ncol(temp_data), by = 2)] = colnames(temp_data)[1:(ncol(temp_data)/2)]
  out[2,] = sqrt(out[2,])
  
  # calculate p values
  df_residual = x$df.residual
  out = rbind(out, sapply(out[1,]/out[2,], function(x) pt(abs(x), df = df_residual, lower.tail = F))*2)
  
  # fill in stars
  for(i in seq(1,(ncol(out)-1), by = 2)){
    if(out[3,i] < 0.01){
      out[1, i+1] = "***"
    }else if(out[3, i] < 0.05){
      out[1, i+1] = "**"
    }else if(out[3, i] < 0.1){
      out[1, i+1] = "*"
    }else{
      ""
    }
  }
  out = out[1:2,]
  out$lower = out$log_stock[1] - out$log_stock[2] * qt(0.975, df = df_residual)
  out$lower[2] = NA
  out$upper = out$log_stock[1] + out$log_stock[2] * qt(0.975, df = df_residual)
  out$upper[2] = NA
  out[is.na(out)] = "" 
  return(out)
}

PW_reg_res = lapply(model.res, PW_res) %>% bind_rows()
PW_reg_res$model = rbind(model.names, "") %>% c()
PW_reg_res = PW_reg_res %>% select(model, everything())
# Bloom K elasticity and CI
PW_reg_res$elasticity = PW_reg_res$log_stock
PW_reg_res$elasticity[which(PW_reg_res$model == "")] = NA
PW_reg_res$elasticity[PW_reg_res$model == "BLOOMLINEAR_K"] = PW_reg_res$elasticity[PW_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K)
PW_reg_res$lower[PW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(PW_reg_res$lower[PW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
PW_reg_res$upper[PW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(PW_reg_res$upper[PW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
write.csv(PW_reg_res, file = res.pw.reg, row.names = F, na = "")

# autocorrelation adjustment: dynamic OLS
library(dynlm)
library(aod)
dols = function(x){
  temp_data = x$model
  colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND")
  # convert data to time series object
  temp_data = window(as.ts(temp_data, start = 1940, frequency = 1, end = 2007))
  
  # find the leads and lags window that we would search for
  # k = floor(nrow(temp_data)^(1/3))/2
  k = floor(4*(nrow(temp_data)/100)^(1/4)) # Choi and Kurozumi (2012)
  # calculate BIC after running DOLS 
  bic <- function (model){
    n <- df.residual(model) + length(variable.names(model))
    n*log(sum(resid(model)^2)/n) + length(variable.names(model)) * log(n)
  }
  
  # need to fix the start date of the model so that different model will use the same set of data for model selection purpose.
  # use the largest lead/lag 
  DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data)
  bics <- sapply(1:k, function(x) bic(dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-x:x) + TREND, 
                                            data = temp_data, start = start(DOLS), end = end(DOLS))))
  k = which.min(bics)
  # optimal 
  DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data)
  # output
  out = robust_res(DOLS)
  # confidence interval
  CI = coefci(DOLS, vcov = NeweyWest(DOLS, adjust = TRUE))["log_STOCK",]
  CI = matrix(c(CI, NA, NA), byrow = T, nrow = 2) %>% as.data.frame()
  colnames(CI) = c("lower", "upper")
  
  out = cbind(out, CI)
  out$elasticity = c(out$log_STOCK[1], NA) 
  out$MSE = c(mean(DOLS$residuals^2), NA)
  return(out)
}

DOLS_reg_res = lapply(model.res, dols) %>% bind_rows()
DOLS_reg_res$model = rbind(model.names, "") %>% c()
DOLS_reg_res = DOLS_reg_res %>% select(model, everything())
# Bloom K elasticity and CI
DOLS_reg_res$elasticity[DOLS_reg_res$model == "BLOOMLINEAR_K"] = DOLS_reg_res$elasticity[DOLS_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K)
DOLS_reg_res$lower[DOLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_reg_res$lower[DOLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
DOLS_reg_res$upper[DOLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_reg_res$upper[DOLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
write.csv(DOLS_reg_res, file = res.dols.reg, row.names = F, na = "")

# Stock and Watson (1993) estimator
# A reference explain kernel and bandwidth for long-run variance estimation
# https://stats.stackexchange.com/questions/153444/what-is-the-long-run variance
# version 1: using cointReg long-run variance
library(cointReg)
# dols_SW = function(x){
#   temp_data = x$model
#   colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND")
#   bic <- function (model){
#     n <- sum(!is.na(model$residuals))
#     n*log(sum(model$residuals^2, na.rm = T)/n) + length(model$theta.all) * log(n)
#   }
#   leads.lags = expand_grid(lead = 0:3, lag = 0:3) %>% mutate(bic = NA)
#   for (i in 1:nrow(leads.lags)) {
#     leads.lags$bic[i] = cointRegD(x = temp_data %>% select(log_STOCK),
#                                   y = temp_data %>% select(log_MFP),
#                                   deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1),
#                                   n.lead = leads.lags$lead[i], n.lag = leads.lags$lag[i],
#                                   bandwidth = "nw") %>% bic()
#   }
#   leads.lags = leads.lags %>% arrange(bic)
#   DOLS <- cointRegD(x = temp_data %>% select(log_STOCK),
#                     y = temp_data %>% select(log_MFP),
#                     deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1),
#                     n.lead = leads.lags$lead[1], n.lag = leads.lags$lag[1],
#                     bandwidth = "nw")
#   # output
#   temp = rbind(DOLS$theta.all, sqrt(diag(DOLS$varmat))) %>% rbind(., NA)
#   df   = length(DOLS$residuals) - length(DOLS$theta)
#   temp[3,] = sapply(temp[1,]/temp[2, ], function(x){2 * pt(-abs(x), df = df)})
#   out  = matrix(NA, nrow = nrow(temp), ncol = ncol(temp)*2) %>% as.data.frame()
#   rownames(out) = rownames(temp)
#   colnames(out)[seq(1,(ncol(out)-1), by = 2)] = colnames(temp)
#   colnames(out)[seq(2,(ncol(out)), by = 2)]   = paste0("sig_",colnames(temp))
#   for(i in 1:nrow(out)){
#     out[i,] = rbind(temp[i,], NA) %>% c()
#   }
#   for(i in seq(1,(ncol(out)-1), by = 2)){
#     if(out[3,i] < 0.01){
#       out[1, i+1] = "***"
#     }else if(out[3, i] < 0.05){
#       out[1, i+1] = "**"
#     }else if(out[3, i] < 0.1){
#       out[1, i+1] = "*"
#     }else{
#       ""
#     }
#   }
#   out[is.na(out)] = ""
#   out = out[-nrow(out),]
#   out$elasticity = c(out$`all.trunclog_STOCK`[1], NA)
#   out$lower = out$`all.trunclog_STOCK`[1] - out$`all.trunclog_STOCK`[2] * qt(0.975, df = df)
#   out$lower[2] = NA
#   out$upper = out$`all.trunclog_STOCK`[1] + out$`all.trunclog_STOCK`[2] * qt(0.975, df = df)
#   out$upper[2] = NA
#   return(out)
# }
# exclude 0 leads and lags
dols_SW = function(x){
  temp_data = x$model
  colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND")
  bic <- function (model){
    n <- sum(!is.na(model$residuals))
    n*log(sum(model$residuals^2, na.rm = T)/n) + length(model$theta.all) * log(n)
  }
  leads.lags = expand_grid(lead = 0:3, lag = 0:3) %>% mutate(bic = NA)
  for (i in 1:nrow(leads.lags)) {
    leads.lags$bic[i] = cointRegD(x = temp_data %>% select(log_STOCK),
                                  y = temp_data %>% select(log_MFP),
                                  deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1),
                                  n.lead = leads.lags$lead[i], n.lag = leads.lags$lag[i],
                                  bandwidth = "nw") %>% bic()
  }
  leads.lags = leads.lags %>% filter(!(lead == 0 & lag == 0)) %>% arrange(bic)
  DOLS <- cointRegD(x = temp_data %>% select(log_STOCK),
                    y = temp_data %>% select(log_MFP),
                    deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1),
                    n.lead = leads.lags$lead[1], n.lag = leads.lags$lag[1],
                    bandwidth = bwNeweyWest(x, adjust = T))
  temp = rbind(DOLS$theta.all, sqrt(diag(DOLS$varmat))) %>% rbind(., NA)
  df   = length(DOLS$residuals) - length(DOLS$theta)
  temp[3,] = sapply(temp[1,]/temp[2, ], function(x){2 * pt(-abs(x), df = df)})
  out  = matrix(NA, nrow = nrow(temp), ncol = ncol(temp)*2) %>% as.data.frame()
  rownames(out) = rownames(temp)
  colnames(out)[seq(1,(ncol(out)-1), by = 2)] = colnames(temp)
  colnames(out)[seq(2,(ncol(out)), by = 2)]   = paste0("sig_",colnames(temp))
  for(i in 1:nrow(out)){
    out[i,] = rbind(temp[i,], NA) %>% c()
  }
  for(i in seq(1,(ncol(out)-1), by = 2)){
    if(out[3,i] < 0.01){
      out[1, i+1] = "***"
    }else if(out[3, i] < 0.05){
      out[1, i+1] = "**"
    }else if(out[3, i] < 0.1){
      out[1, i+1] = "*"
    }else{
      ""
    }
  }
  out[is.na(out)] = ""
  out = out[-nrow(out),]
  out$elasticity = c(out$`all.trunclog_STOCK`[1], NA)
  out$lower = out$`all.trunclog_STOCK`[1] - out$`all.trunclog_STOCK`[2] * qt(0.975, df = df)
  out$lower[2] = NA
  out$upper = out$`all.trunclog_STOCK`[1] + out$`all.trunclog_STOCK`[2] * qt(0.975, df = df)
  out$upper[2] = NA
  return(out)
}

DOLS_SW_reg_res = lapply(model.res, dols_SW) %>% bind_rows()
DOLS_SW_reg_res$model = rbind(model.names, "") %>% c()
DOLS_SW_reg_res = DOLS_SW_reg_res %>% select(model, everything())
# Bloom K elasticity and CI
DOLS_SW_reg_res$elasticity[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"] = DOLS_SW_reg_res$elasticity[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K)
DOLS_SW_reg_res$lower[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_SW_reg_res$lower[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
DOLS_SW_reg_res$upper[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_SW_reg_res$upper[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
write.csv(DOLS_SW_reg_res, file = res.dols.sw.reg, na = "", row.names = F)

# # autocorrelation adjustment: dynamic GLS
# dgls = function(x){
#   temp_data = x$model
#   colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND")
#   # convert data to time series object
#   temp_data = window(as.ts(temp_data, start = 1940, frequency = 1, end = 2007))
#   
#   # find the leads and lags window that we would search for
#   # k = floor(nrow(temp_data)^(1/3))/2
#   k = floor(4*(nrow(temp_data)/100)^(1/4))
#   # calculate BIC after running DOLS 
#   bic <- function (model){
#     n <- df.residual(model) + length(variable.names(model))
#     log(sum(resid(model)^2)/n) + length(variable.names(model)) * log(n)/n
#   }
#   
#   # need to fix the start date of the model so that different model will use the same set of data for model selection purpose.
#   # use the largest lead/lag 
#   DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data)
#   # bics <- sapply(1:k, function(x) {
#   #   temp = dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-x:x) + TREND,
#   #                data = temp_data,
#   #                start = start(DOLS), end = end(DOLS))$model
#   #   bic(prais_winsten(log_MFP ~., data = temp, index = 1:nrow(temp)))
#   # })
#   bics <- sapply(1:k, function(x) bic(dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-x:x) + TREND,
#                                             data = temp_data, start = start(DOLS), end = end(DOLS))))
#   k = which.min(bics)
#   # optimal 
#   DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data)
#   DGLS <- prais_winsten(log_MFP ~.,
#                         data = DOLS$model,
#                         index = 1:nrow(DOLS$model))
#   # robust standard error HC1
#   temp_data = c(DGLS$coefficients, sqrt(diag(vcovHC(DGLS, type = "HC1"))))
#   # output
#   out  = matrix(c(rbind(as.numeric(temp_data), NA)), nrow = 2, byrow = T) %>% as.data.frame()
#   colnames(out)[seq(1, length(temp_data), by = 2)] = names(temp_data)[1:(length(temp_data)/2)]
#   
#   # calculate p values
#   df_residual = DGLS$df.residual
#   out = rbind(out, sapply(out[1,]/out[2,], function(x) pt(abs(x), df = df_residual, lower.tail = F))*2)
#   
#   # fill in stars
#   for(i in seq(1,(ncol(out)-1), by = 2)){
#     if(out[3,i] < 0.01){
#       out[1, i+1] = "***"
#     }else if(out[3, i] < 0.05){
#       out[1, i+1] = "**"
#     }else if(out[3, i] < 0.1){
#       out[1, i+1] = "*"
#     }else{
#       ""
#     }
#   }
#   out = out[1:2,]
#   out$lower = out$log_STOCK[1] - out$log_STOCK[2] * qt(0.975, df = df_residual)
#   out$lower[2] = NA
#   out$upper = out$log_STOCK[1] + out$log_STOCK[2] * qt(0.975, df = df_residual)
#   out$upper[2] = NA
#   out$elasticity = c(out$log_STOCK[1], NA)
#   out[is.na(out)] = "" 
#   return(out)
# }
# 
# DGLS_reg_res = lapply(model.res, dgls) %>% bind_rows()
# DGLS_reg_res$model = rbind(model.names, "") %>% c()
# DGLS_reg_res = DGLS_reg_res %>% select(model, everything())
# # Bloom K elasticity and CI
# DGLS_reg_res$elasticity[DGLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DGLS_reg_res$elasticity[DGLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
# DGLS_reg_res$lower[DGLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DGLS_reg_res$lower[DGLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
# DGLS_reg_res$upper[DGLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DGLS_reg_res$upper[DGLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K)
# write.csv(DGLS_reg_res, file = res.dgls.reg, row.names = F, na = "")