pacman::p_load(tidyverse, RStata, readxl, lmtest, sandwich) setwd("/Users/shanchaowang/Desktop/Submission Data/") # set to your local directory options("RStata.StataPath" = "/Applications/Stata/StataSE.app/Contents/MacOS/stata-se") options("RStata.StataVersion" = 17) # load data load("Distribution.RData") output.name = "TimeSeriesResults.RData" # reproduce table 2 of Anderson & Song, check stationarity of lnMFP and lnZ # plot of raw data of weather index and lnMFP ggplot(mydata, aes(x = YEAR, y = log(MFP))) + geom_line() + xlab("Year") + ylab("Natural Logs") table2 = stata("Code for Processed Data/2.Table2.do", data.in = mydata %>% select(YEAR, MFP, INDEX), data.out = TRUE, stata.echo = F) # output has five elements table2 = as.data.frame(matrix(table2[1,], ncol = 5, byrow = T)) table2 = cbind(data.frame(Variable = c("lnMFP", "lag_lnMFP", "index")), table2) colnames(table2) = c("Varaible", "SIC_lags", "Tau-stat", "1_cv", "5_cv", "10_cv") table2 # Do cointegration tests between lnMFP and lnStock # save model names model.names = c(colnames(mydata)[-(1:4)], "BLOOMLINEAR_K") # create data frame to store results nvar = 42 + 1 # number of variables that STATA return + SSE from each regression res = list() for(i in 1:length(model.names)){ # send data to STATA to process if(model.names[i] == "BLOOMLINEAR_K"){ temp = mydata %>% select("YEAR", "MFP", "INDEX", "TREND", "BLOOMLINEAR") %>% rename(STOCK = BLOOMLINEAR) res[[i]] = stata("2.TwoSeriesTests_Bloom.do", data.in = temp, data.out = T, stata.echo = F) res[[i]]$SSE = sum(lm(log(MFP) ~ STOCK + INDEX + TREND, data = temp)$residuals^2) }else{ temp = mydata %>% select("YEAR", "MFP", "INDEX", "TREND", model.names[i]) %>% rename(STOCK = model.names[i]) res[[i]] = stata("2.TwoSeriesTests.do", data.in = temp, data.out = T, stata.echo = F) res[[i]]$SSE = sum(lm(log(MFP) ~ log(STOCK) + INDEX + TREND, data = temp)$residuals^2) } } res = res %>% bind_rows() rownames(res) = model.names # make results interpretable # a function given test statistics and critical values, gives rejection results reject_null = function(data, levels = c(1, 5, 10)){ test_statistics = abs(data[1]) cv = abs(data[-1]) return(levels[test_statistics >= cv][1]) } out = res %>% select(SSE) %>% mutate(DFGLS_LEVEL = apply(res %>% select(DFGLS_LOG_STOCK, DFGLS_LOG_STOCK_1, DFGLS_LOG_STOCK_5, DFGLS_LOG_STOCK_10), 1, reject_null), DFGLS_LAG = apply(res %>% select(DFGLS_D_LOG_STOCK, DFGLS_D_LOG_STOCK_1, DFGLS_D_LOG_STOCK_5, DFGLS_D_LOG_STOCK_10), 1, reject_null), PPTEST = apply(res %>% select(PPTEST_T, PPTEST_T_1, PPTEST_T_5, PPTEST_T_10), 1, reject_null), JOHANSEN_AIC_M3 = (res$AIC_TRACE_0_M3 >= res$AIC_TRACE_CV_0_M3) & (res$AIC_TRACE_1_M3 < res$AIC_TRACE_CV_1_M3), JOHANSEN_AIC_M4 = (res$AIC_TRACE_0_M4 >= res$AIC_TRACE_CV_0_M4) & (res$AIC_TRACE_1_M4 < res$AIC_TRACE_CV_1_M4), JOHANSEN_HQIC_M3 = (res$HQIC_TRACE_0_M3 >= res$HQIC_TRACE_CV_0_M3) & (res$HQIC_TRACE_1_M3 < res$HQIC_TRACE_CV_1_M3), JOHANSEN_HQIC_M4 = (res$HQIC_TRACE_0_M4 >= res$HQIC_TRACE_CV_0_M4) & (res$HQIC_TRACE_1_M4 < res$HQIC_TRACE_CV_1_M4), JOHANSEN_SBIC_M3 = (res$SBIC_TRACE_0_M3 >= res$SBIC_TRACE_CV_0_M3) & (res$SBIC_TRACE_1_M3 < res$SBIC_TRACE_CV_1_M3), JOHANSEN_SBIC_M4 = (res$SBIC_TRACE_0_M4 >= res$SBIC_TRACE_CV_0_M4) & (res$SBIC_TRACE_1_M4 < res$SBIC_TRACE_CV_1_M4)) out$JOHANSEN = apply(out %>% select(JOHANSEN_HQIC_M3, JOHANSEN_HQIC_M4, JOHANSEN_AIC_M3, JOHANSEN_AIC_M4, JOHANSEN_SBIC_M3, JOHANSEN_SBIC_M4), 1, any) # how many gamma models pass the first round of test: I(1) out %>% filter(is.na(DFGLS_LEVEL), !is.na(DFGLS_LAG)) %>% nrow(.) # pick gammas that passed tests table_output = list() table_output[[1]] = rbind(out[startsWith(model.names, "D"),] %>% filter(is.na(DFGLS_LEVEL) & !is.na(DFGLS_LAG)) %>% filter((!is.na(PPTEST)) & JOHANSEN) %>% arrange(SSE), out["Delta=0.9, Lambda=0.7",], out[!startsWith(model.names, "D"),]) table_output[[2]] = res[rownames(table_output[[1]]),] # store outcome save(table_output, file = output.name)