pacman::p_load(tidyverse, RStata, lmtest, sandwich, prais) setwd("/Users/shanchao/Documents/BCR Project/Data") options("RStata.StataPath" = "/Applications/Stata/StataSE.app/Contents/MacOS/stata-se") options("RStata.StataVersion" = 17) # output plots names residual.plot = "../plots/ResidualPlot_WI_std.png" residual.plot.multi = "../plots/ResidualPlot_WI_std_multi.png" ACF.plot = "../plots/ACF_WI_std.png" PACF.plot = "../plots/PACF_WI_std.png" # output data names res.white.test = "Processed Data/v4/white_tests_WI_std.csv" res.bp.test = "Processed Data/v4/bp_tests_WI_std.csv" res.dw.test = "Processed Data/v4/dw_tests_WI_std.csv" res.bg.test = "Processed Data/v4/bg_tests_WI_std.csv" res.robust.ols = "Processed Data/v4/res_robust_ols_WI_std.csv" res.co.reg = "Processed Data/v4/res_co_reg_WI_std.csv" res.pw.reg = "Processed Data/v4/res_pw_reg_WI_std.csv" res.dols.reg = "Processed Data/v4/res_dols_reg_WI_std.csv" res.dols.test.wald = "Processed Data/v4/res_dols_test_wald.csv" res.dols.test.pp = "Processed Data/v4/res_dols_test_pp.csv" res.dols.sw.reg = "Processed Data/v4/res_dols_sw_reg_WI_std.csv" res.dgls.reg = "Processed Data/v4/res_dgls_reg_WI_std.csv" # obtain the model that pass time series tests load("Processed Data/v4/TimeSeriesResults.RData") model.names = rownames(table_output[[1]]) # load data for regressions, mydata load("Processed Data/v4/Distribution.RData") # only keep selected models (index already standardized) model.data = mydata %>% select(YEAR, MFP, INDEX, TREND, any_of(model.names)) model.data$BLOOMLINEAR_K = model.data$BLOOMLINEAR # plot the preferred model. Our first gamma model model.res = lapply(model.names[model.names != "BLOOMLINEAR_K"], function(x) lm(log(MFP) ~ log(STOCK) + INDEX + TREND, data = model.data %>% select(YEAR, MFP, INDEX, TREND, all_of(x)) %>% rename(STOCK = all_of(x)))) temp = lm(log(MFP) ~ STOCK + INDEX + TREND, data = model.data %>% select(YEAR, MFP, INDEX, TREND, BLOOMLINEAR_K) %>% rename(STOCK = BLOOMLINEAR_K)) %>% list() model.res = c(model.res, temp) rm(temp) cbind(YEAR = model.data$YEAR, model.res[[1]]$model, RESIDUALS = model.res[[1]]$residuals) %>% ggplot(aes(x = YEAR, y = RESIDUALS)) + geom_line() + labs(x = "year", y = "residual") ggsave(residual.plot, height = 12, width = 16, units = "cm") # plot residuals for gamma models. data.frame(YEAR = rep(model.data$YEAR, times = 3), Model = rep(model.names[1:3], each = length(model.data$YEAR)), RESIDUALS = c(model.res[[1]]$residuals, model.res[[2]]$residuals, model.res[[3]]$residuals)) %>% write.csv(file = "Processed Data/v4/Figure5Residuals.csv") data.frame(YEAR = rep(model.data$YEAR, times = 3), Model = rep(model.names[1:3], each = length(model.data$YEAR)), RESIDUALS = c(model.res[[1]]$residuals, model.res[[2]]$residuals, model.res[[3]]$residuals)) %>% ggplot(aes(x = YEAR, y = RESIDUALS)) + geom_line(aes(linetype=Model)) + labs(x = "year", y = "residuals") ggsave(residual.plot.multi, height = 12, width = 16, units = "cm") # white test for all models white_test = function(reg){ regdata = data.frame(RESIDUAL = reg$residuals, FITTED = reg$fitted.values) res = lm(RESIDUAL^2 ~ FITTED + I(FITTED^2), data = regdata) lm.stat = nrow(regdata)*summary(res)$r.squared p.value = pchisq(lm.stat, df = 2, lower.tail = F) res = c(lm.stat, p.value) names(res) = c("lm.stat", "p.value") return(res) } temp = sapply(model.res, white_test) %>% t() %>% as.data.frame() rownames(temp) = model.names write.csv(temp, file = res.white.test) # also perform the BP test bp_test = function(x){ temp_data = cbind(x$model, FITTED = x$fitted.values) colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND", "FITTED") temp = bptest(log_MFP ~ log_STOCK + INDEX + TREND, ~ FITTED, data = temp_data) res = c(temp$statistic, temp$p.value) names(res) = c("BP", "p.value") return(res) } temp = sapply(model.res, bp_test) %>% t() %>% as.data.frame() # cannot reject homoskedasticity assumption for model 1 at any significance level rownames(temp) = model.names write.csv(temp, file = res.bp.test) # produce robust standard error for OLS: using Newey-West estimators (small sample adjustment) robust_res = function(res){ temp = coeftest(res, vcov = NeweyWest(res, adjust = TRUE))[,-3] %>% t() out = matrix(NA, nrow = nrow(temp), ncol = ncol(temp)*2) %>% as.data.frame() rownames(out) = rownames(temp) colnames(out)[seq(1,(ncol(out)-1), by = 2)] = colnames(temp) if("STOCK" %in% colnames(temp)){ colnames(temp)[colnames(temp) == "STOCK"] = "log(STOCK)" out = out %>% rename('log(STOCK)' = 'STOCK') } colnames(out)[seq(2,(ncol(out)), by = 2)] = paste0("sig_",colnames(temp)) for(i in 1:nrow(out)){ out[i,] = rbind(temp[i,], NA) %>% c() } for(i in seq(1,(ncol(out)-1), by = 2)){ if(out[3,i] < 0.01){ out[1, i+1] = "***" }else if(out[3, i] < 0.05){ out[1, i+1] = "**" }else if(out[3, i] < 0.1){ out[1, i+1] = "*" }else{ "" } } out[is.na(out)] = "" out = out[-nrow(out),] return(out) } HAC_lags = sapply(model.res, function(x) {floor(bwNeweyWest(x))}) robust_reg_res = lapply(model.res, robust_res) %>% bind_rows() robust_reg_res$model = rbind(model.names, "") %>% c() robust_reg_res = robust_reg_res %>% select(model, everything()) # include 95% confidence interval for elasticity temp.confin = lapply(model.res[model.names != "BLOOMLINEAR_K"], function(x) {coefci(x, vcov = NeweyWest(x, adjust = TRUE))["log(STOCK)",]}) %>% bind_rows() %>% as.data.frame() temp = lapply(model.res[model.names == "BLOOMLINEAR_K"], function(x) {coefci(x, vcov = NeweyWest(x, adjust = TRUE))["STOCK",]}) %>% bind_rows() %>% as.data.frame() temp.confin = rbind(temp.confin, temp) robust_reg_res$'lower' = c(rbind(temp.confin[,1], NA)) robust_reg_res$'upper' = c(rbind(temp.confin[,2], NA)) robust_reg_res$lower[robust_reg_res$model == "BLOOMLINEAR_K"] = robust_reg_res$lower[robust_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K) robust_reg_res$upper[robust_reg_res$model == "BLOOMLINEAR_K"] = robust_reg_res$upper[robust_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K) robust_reg_res$elasticity = robust_reg_res$`log(STOCK)` robust_reg_res$elasticity[which(robust_reg_res$model == "")] = NA robust_reg_res$elasticity[robust_reg_res$model == "BLOOMLINEAR_K"] = robust_reg_res$elasticity[robust_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K) write.csv(robust_reg_res, res.robust.ols, row.names = F, na="") rm(temp, temp.confin) # test for auto correlation # Durbin-Watson Test dw_test = function(x){ temp = dwtest(x, alternative = "greater") out = c(temp$statistic, temp$p.value) names(out) = c("DW", "p.value") return(out) } temp = lapply(model.res, dw_test) %>% bind_rows() %>% as.data.frame() rownames(temp) = model.names write.csv(temp, res.dw.test) # Breusch-Godfrey Tests bg_test = function(x){ for (i in 1:3) { assign(paste0("order_", i), bgtest(x, order = i, type = 'Chisq', fill = NA)[c(1,4)] %>% bind_cols()) } out = rbind(rbind(order_1, order_2),order_3) out$order = 1:3 return(select(out, order, everything())) } temp = lapply(model.res, bg_test) %>% bind_rows() %>% as.data.frame() temp$model = rep(model.names, each = 3) temp = temp %>% select(model, everything()) write.csv(temp, res.bg.test, row.names = F) # time series plots png(ACF.plot, width = 600, height = 400) acf(model.res[[1]]$residuals, xlim=c(1,10), main = "") dev.off() png(PACF.plot, width = 600, height = 400) pacf(model.res[[1]]$residuals, lag.max = 10, main = "") dev.off() # adjust for autocorrelation: Cochrane-Orcutt CO_res = function(x){ temp_data = x$model colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND") temp_data = stata("tsset TREND prais log_MFP log_STOCK TREND INDEX, corc r ssesearch gen intercept = e(b)[1,4] gen log_stock = e(b)[1,1] gen index = e(b)[1,3] gen trend = e(b)[1,2] gen intercept_vcov = e(V)[4,4] gen log_stock_vcov = e(V)[1,1] gen index_vcov = e(V)[3,3] gen trend_vcov = e(V)[2,2] keep if _n == 1 drop log_MFP log_STOCK TREND INDEX", data.in = temp_data, data.out = T, stata.echo = F) out = matrix(c(rbind(as.numeric(temp_data[1,]), NA)), nrow = 2, byrow = T) %>% as.data.frame() colnames(out)[seq(1, ncol(temp_data), by = 2)] = colnames(temp_data)[1:(ncol(temp_data)/2)] out[2,] = sqrt(out[2,]) #calculate p values df_residual = x$df.residual - 1 out = rbind(out, sapply(out[1,]/out[2,], function(x) pt(abs(x), df = df_residual, lower.tail = F))*2) # fill in stars for(i in seq(1,(ncol(out)-1), by = 2)){ if(out[3,i] < 0.01){ out[1, i+1] = "***" }else if(out[3, i] < 0.05){ out[1, i+1] = "**" }else if(out[3, i] < 0.1){ out[1, i+1] = "*" }else{ "" } } out = out[1:2,] out$lower = out$log_stock[1] - out$log_stock[2] * qt(0.975, df = df_residual) out$lower[2] = NA out$upper = out$log_stock[1] + out$log_stock[2] * qt(0.975, df = df_residual) out$upper[2] = NA out[is.na(out)] = "" return(out) } CO_reg_res = lapply(model.res, CO_res) %>% bind_rows() CO_reg_res$model = rbind(model.names, "") %>% c() CO_reg_res = CO_reg_res %>% select(model, everything()) # calculate elasticity for Bloom models K and CI CO_reg_res$elasticity = CO_reg_res$log_stock CO_reg_res$elasticity[which(CO_reg_res$model == "")] = NA CO_reg_res$elasticity[CO_reg_res$model == "BLOOMLINEAR_K"] = CO_reg_res$elasticity[CO_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K) CO_reg_res$lower[CO_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(CO_reg_res$lower[CO_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) CO_reg_res$upper[CO_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(CO_reg_res$upper[CO_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) write.csv(CO_reg_res, file = res.co.reg, row.names = F, na = "") # adjust for autocorrelation: Prais–Winsten PW_res = function(x){ temp_data = x$model colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND") temp_data = stata("tsset TREND prais log_MFP log_STOCK TREND INDEX, r ssesearch gen intercept = e(b)[1,4] gen log_stock = e(b)[1,1] gen index = e(b)[1,3] gen trend = e(b)[1,2] gen intercept_vcov = e(V)[4,4] gen log_stock_vcov = e(V)[1,1] gen index_vcov = e(V)[3,3] gen trend_vcov = e(V)[2,2] keep if _n == 1 drop log_MFP log_STOCK TREND INDEX", data.in = temp_data, data.out = T, stata.echo = F) out = matrix(c(rbind(as.numeric(temp_data[1,]), NA)), nrow = 2, byrow = T) %>% as.data.frame() colnames(out)[seq(1, ncol(temp_data), by = 2)] = colnames(temp_data)[1:(ncol(temp_data)/2)] out[2,] = sqrt(out[2,]) # calculate p values df_residual = x$df.residual out = rbind(out, sapply(out[1,]/out[2,], function(x) pt(abs(x), df = df_residual, lower.tail = F))*2) # fill in stars for(i in seq(1,(ncol(out)-1), by = 2)){ if(out[3,i] < 0.01){ out[1, i+1] = "***" }else if(out[3, i] < 0.05){ out[1, i+1] = "**" }else if(out[3, i] < 0.1){ out[1, i+1] = "*" }else{ "" } } out = out[1:2,] out$lower = out$log_stock[1] - out$log_stock[2] * qt(0.975, df = df_residual) out$lower[2] = NA out$upper = out$log_stock[1] + out$log_stock[2] * qt(0.975, df = df_residual) out$upper[2] = NA out[is.na(out)] = "" return(out) } PW_reg_res = lapply(model.res, PW_res) %>% bind_rows() PW_reg_res$model = rbind(model.names, "") %>% c() PW_reg_res = PW_reg_res %>% select(model, everything()) # Bloom K elasticity and CI PW_reg_res$elasticity = PW_reg_res$log_stock PW_reg_res$elasticity[which(PW_reg_res$model == "")] = NA PW_reg_res$elasticity[PW_reg_res$model == "BLOOMLINEAR_K"] = PW_reg_res$elasticity[PW_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K) PW_reg_res$lower[PW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(PW_reg_res$lower[PW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) PW_reg_res$upper[PW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(PW_reg_res$upper[PW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) write.csv(PW_reg_res, file = res.pw.reg, row.names = F, na = "") # autocorrelation adjustment: dynamic OLS library(dynlm) library(aod) dols = function(x){ temp_data = x$model colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND") # convert data to time series object temp_data = window(as.ts(temp_data, start = 1940, frequency = 1, end = 2007)) # find the leads and lags window that we would search for # k = floor(nrow(temp_data)^(1/3))/2 k = floor(4*(nrow(temp_data)/100)^(1/4)) # Choi and Kurozumi (2012) # calculate BIC after running DOLS bic <- function (model){ n <- df.residual(model) + length(variable.names(model)) n*log(sum(resid(model)^2)/n) + length(variable.names(model)) * log(n) } # need to fix the start date of the model so that different model will use the same set of data for model selection purpose. # use the largest lead/lag DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data) bics <- sapply(1:k, function(x) bic(dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-x:x) + TREND, data = temp_data, start = start(DOLS), end = end(DOLS)))) k = which.min(bics) # optimal DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data) # output out = robust_res(DOLS) # confidence interval CI = coefci(DOLS, vcov = NeweyWest(DOLS, adjust = TRUE))["log_STOCK",] CI = matrix(c(CI, NA, NA), byrow = T, nrow = 2) %>% as.data.frame() colnames(CI) = c("lower", "upper") out = cbind(out, CI) out$elasticity = c(out$log_STOCK[1], NA) out$MSE = c(mean(DOLS$residuals^2), NA) return(out) } DOLS_reg_res = lapply(model.res, dols) %>% bind_rows() DOLS_reg_res$model = rbind(model.names, "") %>% c() DOLS_reg_res = DOLS_reg_res %>% select(model, everything()) # Bloom K elasticity and CI DOLS_reg_res$elasticity[DOLS_reg_res$model == "BLOOMLINEAR_K"] = DOLS_reg_res$elasticity[DOLS_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K) DOLS_reg_res$lower[DOLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_reg_res$lower[DOLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) DOLS_reg_res$upper[DOLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_reg_res$upper[DOLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) write.csv(DOLS_reg_res, file = res.dols.reg, row.names = F, na = "") # Stock and Watson (1993) estimator # A reference explain kernel and bandwidth for long-run variance estimation # https://stats.stackexchange.com/questions/153444/what-is-the-long-run variance # version 1: using cointReg long-run variance library(cointReg) # dols_SW = function(x){ # temp_data = x$model # colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND") # bic <- function (model){ # n <- sum(!is.na(model$residuals)) # n*log(sum(model$residuals^2, na.rm = T)/n) + length(model$theta.all) * log(n) # } # leads.lags = expand_grid(lead = 0:3, lag = 0:3) %>% mutate(bic = NA) # for (i in 1:nrow(leads.lags)) { # leads.lags$bic[i] = cointRegD(x = temp_data %>% select(log_STOCK), # y = temp_data %>% select(log_MFP), # deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1), # n.lead = leads.lags$lead[i], n.lag = leads.lags$lag[i], # bandwidth = "nw") %>% bic() # } # leads.lags = leads.lags %>% arrange(bic) # DOLS <- cointRegD(x = temp_data %>% select(log_STOCK), # y = temp_data %>% select(log_MFP), # deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1), # n.lead = leads.lags$lead[1], n.lag = leads.lags$lag[1], # bandwidth = "nw") # # output # temp = rbind(DOLS$theta.all, sqrt(diag(DOLS$varmat))) %>% rbind(., NA) # df = length(DOLS$residuals) - length(DOLS$theta) # temp[3,] = sapply(temp[1,]/temp[2, ], function(x){2 * pt(-abs(x), df = df)}) # out = matrix(NA, nrow = nrow(temp), ncol = ncol(temp)*2) %>% as.data.frame() # rownames(out) = rownames(temp) # colnames(out)[seq(1,(ncol(out)-1), by = 2)] = colnames(temp) # colnames(out)[seq(2,(ncol(out)), by = 2)] = paste0("sig_",colnames(temp)) # for(i in 1:nrow(out)){ # out[i,] = rbind(temp[i,], NA) %>% c() # } # for(i in seq(1,(ncol(out)-1), by = 2)){ # if(out[3,i] < 0.01){ # out[1, i+1] = "***" # }else if(out[3, i] < 0.05){ # out[1, i+1] = "**" # }else if(out[3, i] < 0.1){ # out[1, i+1] = "*" # }else{ # "" # } # } # out[is.na(out)] = "" # out = out[-nrow(out),] # out$elasticity = c(out$`all.trunclog_STOCK`[1], NA) # out$lower = out$`all.trunclog_STOCK`[1] - out$`all.trunclog_STOCK`[2] * qt(0.975, df = df) # out$lower[2] = NA # out$upper = out$`all.trunclog_STOCK`[1] + out$`all.trunclog_STOCK`[2] * qt(0.975, df = df) # out$upper[2] = NA # return(out) # } # exclude 0 leads and lags dols_SW = function(x){ temp_data = x$model colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND") bic <- function (model){ n <- sum(!is.na(model$residuals)) n*log(sum(model$residuals^2, na.rm = T)/n) + length(model$theta.all) * log(n) } leads.lags = expand_grid(lead = 0:3, lag = 0:3) %>% mutate(bic = NA) for (i in 1:nrow(leads.lags)) { leads.lags$bic[i] = cointRegD(x = temp_data %>% select(log_STOCK), y = temp_data %>% select(log_MFP), deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1), n.lead = leads.lags$lead[i], n.lag = leads.lags$lag[i], bandwidth = "nw") %>% bic() } leads.lags = leads.lags %>% filter(!(lead == 0 & lag == 0)) %>% arrange(bic) DOLS <- cointRegD(x = temp_data %>% select(log_STOCK), y = temp_data %>% select(log_MFP), deter = temp_data %>% select(INDEX, TREND) %>% mutate(INTERCEPT = 1), n.lead = leads.lags$lead[1], n.lag = leads.lags$lag[1], bandwidth = bwNeweyWest(x, adjust = T)) temp = rbind(DOLS$theta.all, sqrt(diag(DOLS$varmat))) %>% rbind(., NA) df = length(DOLS$residuals) - length(DOLS$theta) temp[3,] = sapply(temp[1,]/temp[2, ], function(x){2 * pt(-abs(x), df = df)}) out = matrix(NA, nrow = nrow(temp), ncol = ncol(temp)*2) %>% as.data.frame() rownames(out) = rownames(temp) colnames(out)[seq(1,(ncol(out)-1), by = 2)] = colnames(temp) colnames(out)[seq(2,(ncol(out)), by = 2)] = paste0("sig_",colnames(temp)) for(i in 1:nrow(out)){ out[i,] = rbind(temp[i,], NA) %>% c() } for(i in seq(1,(ncol(out)-1), by = 2)){ if(out[3,i] < 0.01){ out[1, i+1] = "***" }else if(out[3, i] < 0.05){ out[1, i+1] = "**" }else if(out[3, i] < 0.1){ out[1, i+1] = "*" }else{ "" } } out[is.na(out)] = "" out = out[-nrow(out),] out$elasticity = c(out$`all.trunclog_STOCK`[1], NA) out$lower = out$`all.trunclog_STOCK`[1] - out$`all.trunclog_STOCK`[2] * qt(0.975, df = df) out$lower[2] = NA out$upper = out$`all.trunclog_STOCK`[1] + out$`all.trunclog_STOCK`[2] * qt(0.975, df = df) out$upper[2] = NA return(out) } DOLS_SW_reg_res = lapply(model.res, dols_SW) %>% bind_rows() DOLS_SW_reg_res$model = rbind(model.names, "") %>% c() DOLS_SW_reg_res = DOLS_SW_reg_res %>% select(model, everything()) # Bloom K elasticity and CI DOLS_SW_reg_res$elasticity[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"] = DOLS_SW_reg_res$elasticity[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"]*median(model.data$BLOOMLINEAR_K) DOLS_SW_reg_res$lower[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_SW_reg_res$lower[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) DOLS_SW_reg_res$upper[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DOLS_SW_reg_res$upper[DOLS_SW_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) write.csv(DOLS_SW_reg_res, file = res.dols.sw.reg, na = "", row.names = F) # # autocorrelation adjustment: dynamic GLS # dgls = function(x){ # temp_data = x$model # colnames(temp_data) = c("log_MFP", "log_STOCK", "INDEX", "TREND") # # convert data to time series object # temp_data = window(as.ts(temp_data, start = 1940, frequency = 1, end = 2007)) # # # find the leads and lags window that we would search for # # k = floor(nrow(temp_data)^(1/3))/2 # k = floor(4*(nrow(temp_data)/100)^(1/4)) # # calculate BIC after running DOLS # bic <- function (model){ # n <- df.residual(model) + length(variable.names(model)) # log(sum(resid(model)^2)/n) + length(variable.names(model)) * log(n)/n # } # # # need to fix the start date of the model so that different model will use the same set of data for model selection purpose. # # use the largest lead/lag # DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data) # # bics <- sapply(1:k, function(x) { # # temp = dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-x:x) + TREND, # # data = temp_data, # # start = start(DOLS), end = end(DOLS))$model # # bic(prais_winsten(log_MFP ~., data = temp, index = 1:nrow(temp))) # # }) # bics <- sapply(1:k, function(x) bic(dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-x:x) + TREND, # data = temp_data, start = start(DOLS), end = end(DOLS)))) # k = which.min(bics) # # optimal # DOLS <- dynlm(log_MFP ~ log_STOCK + INDEX + L(diff(log_STOCK),-k:k) + TREND, data = temp_data) # DGLS <- prais_winsten(log_MFP ~., # data = DOLS$model, # index = 1:nrow(DOLS$model)) # # robust standard error HC1 # temp_data = c(DGLS$coefficients, sqrt(diag(vcovHC(DGLS, type = "HC1")))) # # output # out = matrix(c(rbind(as.numeric(temp_data), NA)), nrow = 2, byrow = T) %>% as.data.frame() # colnames(out)[seq(1, length(temp_data), by = 2)] = names(temp_data)[1:(length(temp_data)/2)] # # # calculate p values # df_residual = DGLS$df.residual # out = rbind(out, sapply(out[1,]/out[2,], function(x) pt(abs(x), df = df_residual, lower.tail = F))*2) # # # fill in stars # for(i in seq(1,(ncol(out)-1), by = 2)){ # if(out[3,i] < 0.01){ # out[1, i+1] = "***" # }else if(out[3, i] < 0.05){ # out[1, i+1] = "**" # }else if(out[3, i] < 0.1){ # out[1, i+1] = "*" # }else{ # "" # } # } # out = out[1:2,] # out$lower = out$log_STOCK[1] - out$log_STOCK[2] * qt(0.975, df = df_residual) # out$lower[2] = NA # out$upper = out$log_STOCK[1] + out$log_STOCK[2] * qt(0.975, df = df_residual) # out$upper[2] = NA # out$elasticity = c(out$log_STOCK[1], NA) # out[is.na(out)] = "" # return(out) # } # # DGLS_reg_res = lapply(model.res, dgls) %>% bind_rows() # DGLS_reg_res$model = rbind(model.names, "") %>% c() # DGLS_reg_res = DGLS_reg_res %>% select(model, everything()) # # Bloom K elasticity and CI # DGLS_reg_res$elasticity[DGLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DGLS_reg_res$elasticity[DGLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) # DGLS_reg_res$lower[DGLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DGLS_reg_res$lower[DGLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) # DGLS_reg_res$upper[DGLS_reg_res$model == "BLOOMLINEAR_K"] = as.numeric(DGLS_reg_res$upper[DGLS_reg_res$model == "BLOOMLINEAR_K"])*median(model.data$BLOOMLINEAR_K) # write.csv(DGLS_reg_res, file = res.dgls.reg, row.names = F, na = "")