#Set working directory to where replication datases are located.

setwd("C:/Users/cdmyers/Box/Current Projects/COVID-19/COVID-19 Content Analysis/Data")

library(tidyverse)

#Read local dataset and national dataset

localData <- read.csv("covidLocalNewsCoverageCombinedForReplication05062024.csv")
nationalData <- read.csv("covidNationalNewsCoverageCombinedForReplication04292024.csv")

#Sample sizes for each dataset

length(nationalData$title)
length(nationalData$title[which(nationalData$paper == "wapo")])
length(localData$title)
length(localData$title[which(localData$tier == 1)])
length(localData$title[which(localData$tier == 2)])
length(localData$title[which(localData$tier == 3)])


##Generating the proportion of articles in each paper and tier that were coded as containing conflict frames.
##These are reported in Tables 2, 3, and A1

#A table holding the proportion of articles with partisan conflict frames in each publication
prop.table(table(localData$paper, localData$conflict), margin = 1)

#A table holding the proportion of articles with partisan conflict frames in circulation tier
prop.table(table(localData$tier, localData$conflict), margin = 1)

#A table holding the proportion of articles with partisan conflict frames in each national paper
prop.table(table(nationalData$paper, nationalData$conflict), margin = 1)

#Generating Table A.1

local_papers <- localData %>% 
  group_by(paper) %>%
  summarise(
    n = n(),
    perc_conflict = mean(conflict) * 100,
    perc_game = mean(game) * 100) %>%
  mutate(across(c(perc_conflict, perc_game), ~ round(., 1))) %>%
  rename("category" = paper) %>% mutate(category = as.character(category))

local_tiers <- localData %>% 
  group_by(tier) %>%
  summarise(
    n = n(),
    perc_conflict = mean(conflict) * 100,
    perc_game = mean(game) * 100) %>%
  mutate(across(c(perc_conflict, perc_game), ~ round(., 1))) %>%
  rename("category" = tier) %>% mutate(category = as.character(category))

local_all <- localData %>% 
  summarise(
    n = n(),
    perc_conflict = mean(conflict) * 100,
    perc_game = mean(game) * 100) %>%
  mutate(across(c(perc_conflict, perc_game), ~ round(., 1))) %>%
  mutate(category = "all_local")

nationals <- nationalData %>%
  group_by(paper) %>%
  summarise(
    n = n(),
    perc_conflict = mean(conflict) * 100,
    perc_game = mean(game) * 100) %>%
  mutate(across(c(perc_conflict, perc_game), ~ round(., 1))) %>%
  rename("category" = paper)

national_all <- nationalData %>% 
  summarise(
    n = n(),
    perc_conflict = mean(conflict) * 100,
    perc_game = mean(game) * 100) %>%
  mutate(across(c(perc_conflict, perc_game), ~ round(., 1))) %>%
  mutate(category = "all_national")

table_A1 <- bind_rows(
  local_papers, local_tiers, local_all, nationals, national_all)

#Generating Table 2

nationals <- nationalData %>%
  group_by(paper) %>%
  summarise(
    n = n(),
    perc_conflict = mean(conflict) * 100,
    perc_game = mean(game) * 100) %>%
  mutate(across(c(perc_conflict, perc_game), ~ round(., 1))) %>%
  rename("category" = paper)

national_all <- nationalData %>% 
  summarise(
    n = n(),
    perc_conflict = mean(conflict) * 100,
    perc_game = mean(game) * 100) %>%
  mutate(across(c(perc_conflict, perc_game), ~ round(., 1))) %>%
  mutate(category = "all_national")

table_2 <- bind_rows(
  local_tiers, nationals, local_all, national_all)

##Test of equal proportions of conflict frames reported in Table 3

##Testing for equality of proportions in conflict frames between tiers of local newspapers 
#1 vs 2
conflict_1v2 <- prop.test(c(table(localData$tier, localData$conflict)[1,2], table(localData$tier, localData$conflict)[2,2]), 
          c(table(localData$tier)[1], table(localData$tier)[2]))
#1 vs 3
conflict_1v3 <- prop.test(c(table(localData$tier, localData$conflict)[1,2], table(localData$tier, localData$conflict)[3,2]), 
          c(table(localData$tier)[1], table(localData$tier)[3]))
#2 vs 3
conflict_2v3 <- prop.test(c(table(localData$tier, localData$conflict)[2,2], table(localData$tier, localData$conflict)[3,2]), 
          c(table(localData$tier)[2], table(localData$tier)[3]))

##Testing for equality of proportions in conflict frames between national newspapers and tiers of local newspapers 
#Local vs. National
conflict_LvN <- prop.test(c(table(localData$conflict)[2], table(nationalData$conflict)[2]), 
          c(length(localData$conflict), length(nationalData$conflict)))

#1 vs nat
conflict_1vN <- prop.test(c(table(localData$tier, localData$conflict)[1,2], table(nationalData$conflict)[2]), 
          c(table(localData$tier)[1], length(nationalData$conflict)))

#2 vs nat
conflict_2vN <- prop.test(c(table(localData$tier, localData$conflict)[2,2], table(nationalData$conflict)[2]), 
          c(table(localData$tier)[2], length(nationalData$conflict)))

#3 vs nat
conflict_3vN <- prop.test(c(table(localData$tier, localData$conflict)[3,2], table(nationalData$conflict)[2]), 
          c(table(localData$tier)[3], length(nationalData$conflict)))

#NYTimes vs. WaPo

conflict_NYTvWaPo <- prop.test(c(table(nationalData$paper, nationalData$conflict)[1,2], table(nationalData$paper, nationalData$conflict)[2,2]), 
          c(table(nationalData$paper)[1], table(nationalData$paper)[2]))

##Creating the Conflict Frame panel of Table 2

tests_conflict <- c("Local vs. National", 
                    "Tier 1 vs. National", "Tier 2 vs. National", "Tier 3 vs. National", 
                    "Tier 1 vs. Tier 2", "Tier 1 vs. Tier 3", "Tier 2 vs. Tier 3",
                    "WaPo vs. NYTimes")

prop1_conflict <- round(c(conflict_LvN$estimate[1],
                    conflict_1vN$estimate[1], conflict_2vN$estimate[1], conflict_3vN$estimate[1],
                    conflict_1v2$estimate[1], conflict_1v3$estimate[1], conflict_2v3$estimate[1],
                    conflict_NYTvWaPo$estimate[1]), 3)

prop2_conflict <- round(c(conflict_LvN$estimate[2],
                    conflict_1vN$estimate[2], conflict_2vN$estimate[2], conflict_3vN$estimate[2],
                    conflict_1v2$estimate[2], conflict_1v3$estimate[2], conflict_2v3$estimate[2],
                    conflict_NYTvWaPo$estimate[2]), 3)

diff_conflict <- round(c(conflict_LvN$estimate[1] - conflict_LvN$estimate[2],
                   conflict_1vN$estimate[1] - conflict_1vN$estimate[2], conflict_2vN$estimate[1] - conflict_2vN$estimate[2], conflict_3vN$estimate[1] - conflict_3vN$estimate[2],
                   conflict_1v2$estimate[1] - conflict_1v2$estimate[2], conflict_1v3$estimate[1] - conflict_1v3$estimate[2], conflict_2v3$estimate[1] - conflict_1v3$estimate[2],
                   conflict_NYTvWaPo$estimate[1] - conflict_NYTvWaPo$estimate[2]), 3)

p_conflict <- round(c(conflict_LvN$p.value,
                conflict_1vN$p.value, conflict_2vN$p.value, conflict_3vN$p.value,
                conflict_1v2$p.value, conflict_1v3$p.value, conflict_2v3$p.value,
                conflict_NYTvWaPo$p.value), 3)

confInt1_conflict <- round(c(conflict_LvN$conf.int[1],
                    conflict_1vN$conf.int[1], conflict_2vN$conf.int[1], conflict_3vN$conf.int[1],
                    conflict_1v2$conf.int[1], conflict_1v3$conf.int[1], conflict_2v3$conf.int[1],
                    conflict_NYTvWaPo$conf.int[1]),3)

confInt2_conflict <- round(c(conflict_LvN$conf.int[2],
                    conflict_1vN$conf.int[2], conflict_2vN$conf.int[2], conflict_3vN$conf.int[2],
                    conflict_1v2$conf.int[2], conflict_1v3$conf.int[2], conflict_2v3$conf.int[2],
                    conflict_NYTvWaPo$conf.int[2]),3)

table_3Conflict <- bind_cols(tests_conflict, prop1_conflict, prop2_conflict, diff_conflict, p_conflict, confInt1_conflict, confInt2_conflict)
names(table_3Conflict) <- c("Comparison", "Proportion 1", "Proportion 2", "Difference", "p Value", "Conf Int Low", "Conf Int High")


###Game Frame Analysis

##Test of equal proportions of game frames reported in Table 3

##Testing for equality of proportions in game frames between tiers of local newspapers 
#1 vs 2
game_1v2 <- prop.test(c(table(localData$tier, localData$game)[1,2], table(localData$tier, localData$game)[2,2]), 
                      c(table(localData$tier)[1], table(localData$tier)[2]))
#1 vs 3
game_1v3 <- prop.test(c(table(localData$tier, localData$game)[1,2], table(localData$tier, localData$game)[3,2]), 
                      c(table(localData$tier)[1], table(localData$tier)[3]))
#2 vs 3
game_2v3 <- prop.test(c(table(localData$tier, localData$game)[2,2], table(localData$tier, localData$game)[3,2]), 
                      c(table(localData$tier)[2], table(localData$tier)[3]))

##Testing for equality of proportions in game frames between national newspapers and tiers of local newspapers 
#Local vs. National
game_LvN <- prop.test(c(table(localData$game)[2], table(nationalData$game)[2]), 
                      c(length(localData$game), length(nationalData$game)))

#1 vs nat
game_1vN <- prop.test(c(table(localData$tier, localData$game)[1,2], table(nationalData$game)[2]), 
                      c(table(localData$tier)[1], length(nationalData$game)))

#2 vs nat
game_2vN <- prop.test(c(table(localData$tier, localData$game)[2,2], table(nationalData$game)[2]), 
                      c(table(localData$tier)[2], length(nationalData$game)))

#3 vs nat
game_3vN <- prop.test(c(table(localData$tier, localData$game)[3,2], table(nationalData$game)[2]), 
                      c(table(localData$tier)[3], length(nationalData$game)))

#NYTimes vs. WaPo

game_NYTvWaPo <- prop.test(c(table(nationalData$paper, nationalData$game)[1,2], table(nationalData$paper, nationalData$game)[2,2]), 
                           c(table(nationalData$paper)[1], table(nationalData$paper)[2]))

##Creating the game Frame panel of Table 2

tests_game <- c("Local vs. National", 
                "Tier 1 vs. National", "Tier 2 vs. National", "Tier 3 vs. National", 
                "Tier 1 vs. Tier 2", "Tier 1 vs. Tier 3", "Tier 2 vs. Tier 3",
                "WaPo vs. NYTimes")

prop1_game <- round(c(game_LvN$estimate[1],
                      game_1vN$estimate[1], game_2vN$estimate[1], game_3vN$estimate[1],
                      game_1v2$estimate[1], game_1v3$estimate[1], game_2v3$estimate[1],
                      game_NYTvWaPo$estimate[1]), 3)

prop2_game <- round(c(game_LvN$estimate[2],
                      game_1vN$estimate[2], game_2vN$estimate[2], game_3vN$estimate[2],
                      game_1v2$estimate[2], game_1v3$estimate[2], game_2v3$estimate[2],
                      game_NYTvWaPo$estimate[2]), 3)

diff_game <- round(c(game_LvN$estimate[1] - game_LvN$estimate[2],
                     game_1vN$estimate[1] - game_1vN$estimate[2], game_2vN$estimate[1] - game_2vN$estimate[2], game_3vN$estimate[1] - game_3vN$estimate[2],
                     game_1v2$estimate[1] - game_1v2$estimate[2], game_1v3$estimate[1] - game_1v3$estimate[2], game_2v3$estimate[1] - game_1v3$estimate[2],
                     game_NYTvWaPo$estimate[1] - game_NYTvWaPo$estimate[2]), 3)

p_game <- round(c(game_LvN$p.value,
                  game_1vN$p.value, game_2vN$p.value, game_3vN$p.value,
                  game_1v2$p.value, game_1v3$p.value, game_2v3$p.value,
                  game_NYTvWaPo$p.value), 3)

confInt1_game <- round(c(game_LvN$conf.int[1],
                         game_1vN$conf.int[1], game_2vN$conf.int[1], game_3vN$conf.int[1],
                         game_1v2$conf.int[1], game_1v3$conf.int[1], game_2v3$conf.int[1],
                         game_NYTvWaPo$conf.int[1]),3)

confInt2_game <- round(c(game_LvN$conf.int[2],
                         game_1vN$conf.int[2], game_2vN$conf.int[2], game_3vN$conf.int[2],
                         game_1v2$conf.int[2], game_1v3$conf.int[2], game_2v3$conf.int[2],
                         game_NYTvWaPo$conf.int[2]),3)

table_3game <- bind_cols(tests_game, prop1_game, prop2_game, diff_game, p_game, confInt1_game, confInt2_game)
names(table_3game) <- c("Comparison", "Proportion 1", "Proportion 2", "Difference", "p Value", "Conf Int Low", "Conf Int High")

##Calculating the proportion of game framed articles that are also coded as conflict frames and conflict framed articles that are coded as game framed
##Reported in footnote 8

table(c(nationalData$conflict, localData$conflict), c(nationalData$game, localData$game))

##First, a difference of proportion test 

prop.test(c(sum(localData$conflict[which(localData$syndicated == 1)]), sum(localData$conflict[which(localData$syndicated == 0)])),
          c(length(localData$conflict[which(localData$syndicated == 1)]), length(localData$conflict[which(localData$syndicated == 0)])))

##Now conducting the tests comparing syndicated articles to those written by the newspapers' own reporters across tiers reported in Table 4.
##This is reported in teh first paragraph of the "Where did the Partisan Frames Come From?" section.

tier1table_syndConflict <- table(localData$conflict[which(localData$tier == 1)], 
                                 localData$syndicated[which(localData$tier == 1)])

synd_tier1 <- prop.test(c(tier1table_syndConflict[2,2], tier1table_syndConflict[2,1]), 
                        c(sum(tier1table_syndConflict[,2]), sum(tier1table_syndConflict[,1])))

tier2table_syndConflict <- table(localData$conflict[which(localData$tier == 2)], 
                                 localData$syndicated[which(localData$tier == 2)])

synd_tier2 <- prop.test(c(tier2table_syndConflict[2,2], tier2table_syndConflict[2,1]), 
                        c(sum(tier2table_syndConflict[,2]), sum(tier2table_syndConflict[,1])))

tier3table_syndConflict <- table(localData$conflict[which(localData$tier == 3)], 
                                 localData$syndicated[which(localData$tier == 3)])

synd_tier3 <- prop.test(c(tier3table_syndConflict[2,2], tier3table_syndConflict[2,1]), 
                        c(sum(tier3table_syndConflict[,2]), sum(tier3table_syndConflict[,1])))

synd_ownNs <- c(sum(tier1table_syndConflict[,1]), sum(tier2table_syndConflict[,1]), sum(tier3table_syndConflict[,1]))

synd_otherNs <- c(sum(tier1table_syndConflict[,2]), sum(tier2table_syndConflict[,2]), sum(tier3table_syndConflict[,2]))

synd_ownProp <- round(c(synd_tier1$estimate[2], synd_tier2$estimate[2], synd_tier3$estimate[2]), 3)

synd_otherProp <- round(c(synd_tier1$estimate[1], synd_tier2$estimate[1], synd_tier3$estimate[1]), 3)

synd_diff <- round(synd_otherProp - synd_ownProp, 3)

synd_p <- round(c(synd_tier1$p.value, synd_tier2$p.value, synd_tier3$p.value), 3)

synd_confIntLow <- round(c(synd_tier1$conf.int[1], synd_tier2$conf.int[1], synd_tier3$conf.int[1]), 3)

synd_confIntHigh <- round(c(synd_tier1$conf.int[2], synd_tier2$conf.int[2], synd_tier3$conf.int[2]), 3)

table_4 <- bind_cols(synd_ownNs, synd_otherNs, synd_ownProp, synd_otherProp, synd_diff, synd_p, synd_confIntLow, synd_confIntHigh)
names(table_4) <- c("Own N", "Other N", "Proportion Own", "Proportion Other", "Difference", "p Value", "Conf Int Low", "Conf Int High")