#THIS FILE HOLDS CODE FOR CONSTRUCTING A COLLABORATION-LEVEL MODEL WHEREIN THE STATE ALLOCATES A PROPORTIONAL BUDGET TO ALL COUNTIES AND THEN COUNTIES ALLOCATE THEIR INSPECTION STATIONS INWARDLY (IGNORING WHEN THEY SEND BOATS OUT TO OTHER COUNTIES) AND DEFENSIVELY (KNOWING AND CARING ABOUT BOATS COMING IN FROM OTHER COUNTIES). HOWEVER, COUNTIES ARE UNAWARE OF CHOICES MADE BY OTHER COUNTIES, WHICH MAY RESULT IN REDUNDANT PLACEMENT OF INSPECTORS. ALL THIS IS THE SAME AS IN THE COUNTY-LEVEL MODEL EXCEPT THAT, HERE, WE NOW HAVE "COMBINED" SOME COUNTIES INTO COLLABORATIVE UNITS THAT WILL OPERATE AS THOUGH THEY ARE ONE "JOINT COUNTY."

#STEP 0: LOAD PACKAGES. NOTE--NOT ALL OF THESE MAY STILL BE NEEDED, BUT THIS HASN'T BEEN VERIFIED. 
list.of.packages = c("pacman", "dplyr", "ROI", "ompr", "ompr.roi", "Rsymphony",
                     "ROI.plugin.symphony", "tidyr", "Matrix", "igraph")
new.packages = list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if (length(new.packages) > 0) {
  r=getOption("repos")
  r["CRAN"] = "http://cran.rstudio.com/"
  options(repos=r)
  install.packages(new.packages)
}
pacman::p_load(dplyr, ROI, ompr, ompr.roi, Rsymphony, ROI.plugin.symphony, tidyr, Matrix, igraph)

#STEP 1: LOAD IN OUR NETWORK EDGE LIST, OUR FIRST KEY INPUT OBJECT. THIS INCLUDES DOW NUMBERS FOR THE "FROM" AND "TO" LAKES IN EACH EDGE, THE NUMBER OF BOATS MOVING FROM-->TO YEARLY (WEIGHT COLUMN), AND A BIJ VALUE, WHICH IS A COUNT OF THE NUMBER OF NEW INVASIVES THAT COULD VIABLY SPREAD FROM THE "FROM" LAKE TO THE "TO" LAKE OUT OF THE FOUR WE'RE MODELING. BY THIS POINT, EDGES THAT CANNOT RESULT IN ANY NEW INVASIVES (BIJ == 0) HAVE ALREADY BEEN REMOVED, SO THIS COLUMN IS NO LONGER NEEDED.
boats.reduce = readRDS(file = "boats_reduce")
NIJ2 = boats.reduce %>% dplyr::select(-bij)

NIJ2$from = as.character(NIJ2$from) #TRANSITION TO CHARACTERS FOR THESE COLUMNS.
NIJ2$to = as.character(NIJ2$to)

#STEP 2: LOAD IN A DOC THAT HOLDS INFO ABOUT WHICH LAKES ARE IN WHICH COUNTIES SO WE CAN KNOW WHICH LAKES "BELONG" TO WHICH COUNTIES FROM A JURIDISTICTIONAL STANDPOINT.
Counties = read.csv("Lakes_total_collabs.csv") #THIS OBJECT INCLUDES A collab_name COLUMN THAT INDICATES WHETHER A COUNTY IS IN A PROSPECTIVE COLLABORATION AND, IF SO, WHICH ONE. OTHERWISE, IT IS A REPEAT OF THE county_name COLUMN. IT ALSO INCLUDES EXTRANEOUS DATA, SUCH AS LAKE ACREAGE, UTM LOCATION DATA, AND WHETHER OR NOT THE LAKE WAS CONSIDERED INFESTED FOR THE FOUR SPECIES OF INTEREST BY 2019. 

Counties = dplyr::select(Counties, DOW, county_name = collab_name) #PRUNE DOWN TO JUST THE RELEVANT VARIABLES AT THIS POINT. ***Note that, in this file, we reference the collab_name column and use it from here on out as though it held the names of counties. This differentiates this file from that of the countylevel_model.R file.
Counties$DOW = as.character(Counties$DOW) #MOVE TO CHARACTERS FOR PROPER MATCHING.

#STEP 3: ADD COUNTY INCLUSION INFO TO EVERY EDGE VIA JOINING. THRUOUT: FROM = SENDING LAKE, TO = RECEIVING LAKE. SAME GOES FOR COUNTIES. FOR EACH EDGE, WE'LL KNOW WHICH COUNTIES EACH LAKE BELONGS TO.
NIJ3 = NIJ2 %>% left_join(Counties, by = c("from" = "DOW"))
NIJ4 = NIJ3 %>% left_join(Counties, by = c("to" = "DOW"), suffix = c(".from", ".to")) 

#STEP 4: GIVE SETS OF UNIQUE NUMBERS TO LAKES SO THESE CAN BE BETTER TRACKED AND INDEXED.
NIJ4$lake.D = as.numeric(factor(NIJ4$from, levels=unique(c(NIJ4$from, NIJ4$to))))
NIJ4$lake.R = as.numeric(factor(NIJ4$to, levels=unique(c(NIJ4$from, NIJ4$to))))

#The earlier joining step introduces duplicates in terms of edges, which is not atypical for join operations unfortunately but which is also undesirable because it would allow the same boats to be potentially counted multiple times. So, we need to eliminate those now, at least in so far as the receiving county is concerned. That is, we only want to keep "duplicate" edges if and only if a lake is technically in the jurisdiction of multiple counties and thus multiple counties could all inspect it, which we would then treat as being redundant/inefficient after the fact, which matches with our assumptions and expectations. 
NIJ4 = NIJ4 %>% distinct(from, to, weight,county_name.to, .keep_all = TRUE)

#STEP 5: SET GLOBAL AND LOCAL VARIABLES FOR THE MODEL.
counties.list = unique(c(NIJ4$county_name.from, NIJ4$county_name.to)) #List of all counties with any relevant edges to  consider--some counties could be receiving or sending no risky boats and we can ignore these to varying degrees. 

budgets = read.csv("collabmod_budgets_collabsandcounties.csv") #Look-up table with the assigned budgets for every county that we've generated elsewhere and now just need to bring in here. ***For this file, we're bringing in a document that has budgets allocated to the two collaborations we're considering, having taken those budgets from the counties in each collaboration. This differentiates this file from the countylevel_model.R file. 
budgets = budgets[budgets$County %in% unique(counties.list),] #Some counties included in the lookup table could have no relevant edges, as we may have discovered above, and we might as well not even consider those by eliminating them from the budget object we're going to loop through as well.

#There are also some counties that never get allotted a budget in any statewide budget amount, and there is really no reason to consider those counties at all. These are easy to spot--their budget value in the last column would be 0.
budgets = budgets[c(which(budgets$X700!=0)),]

#Next, aggregate the different budgets we will need to run for each distinct county--we may need to know a county's behavior for a multitude of different budgets and this line determines what those different budgets will be. As we step thru counties in the budget object, we will also, within each county, step thru its various budget levels, skipping any duplicates thanks to this step. 
budgets.list = apply(budgets[,2:ncol(budgets)], 1, function(x) { unique(c(x)) } )

#What is easy to observe is that many counties get allotted a budget of 0 in at least one statewide budget--there is really no need to consider those 0 budgets and they just waste our time, so we need to eliminate them but in a predictable way. Because these are stored in a list, we will need to step through the list and replace 0s with NULLs.
for(x in 1:length(budgets.list)) { 
  if(budgets.list[[x]][1] == 0) { #If the first value in a county's budget list is 0...
    budgets.list[[x]] = budgets.list[[x]][-1] #Replace that county's budget list with a replica missing the first entry to eliminate that 0.
  }
}

boats = NIJ4$weight #Shortcut to the number of boats moving along each edge "globally."
lakes.R = NIJ4$lake.R #Shortcut to a vector of all the receiving lakes (as lake ID numbers) for all edges globally.
lakes.D = NIJ4$lake.D #Shortcut to a vector of all the donating lakes (as lake ID numbers) for all edges globally.
Nlakes = max(c(lakes.R, lakes.D)) #Shortcut to the max number of lakes being modeled globally. 

#Here, we define a global variable, Vijj'. Vijj' indicates whether edge i involves lakes j (as donor) or j' (as receiver), where j != j'. If an edge does involve a lake, its corresponding cell is marked as 1 and is 0 otherwise. This becomes a very large object, so it takes some time to make it. First, we set up the empty, sparse Matrix, then fill it only where we need to do so. 
Vij = Matrix(0, 
             nrow=nrow(NIJ4), 
             ncol=Nlakes, 
             sparse=T)

for(co in 1:max(c(lakes.R, lakes.D))) {
  Vij[NIJ4$lake.R == co | NIJ4$lake.D == co,co] = 1 #Each lake is a unique column, so we go column by column and mark off all lakes that are "involved" in edges in that county.
}

#Create a storage object for the lakes selected for each county for each budget, B, being considered.
lakes.selected.B = vector("list", length(budgets$County))

#STEP 7: BEGIN TO STEP THRU EACH COUNTY WITH AN INSPECTION BUDGET AND DETERMINE WHAT THAT COUNTY WOULD DO IF GIVEN EACH OF ITS ASSIGNED BUDGETS.
for (county in 1:nrow(budgets)) { #FOR EACH COUNTY...
current.county = budgets$County[county] #SELECT THE CURRENT COUNTY
print(paste0("We're beginning ", current.county, " county")) #Progress reporting.

#STEP 8: DEFINE SHORTCUT VARIABLE Fj. It is the IDs of all lakes that are within the jurisdiction of current county k. These are the only lakes we could place inspection stations at when considering county k, since we know counties will not inspect lakes that are not within their jurisdiction. So, they are the only ones we will need to consider when searching for the "optimal" solution for a given county. By shrinking the object in this way, we significantly cut down on the number of possibilities the model needs to consider.
lake.IDs.Fj = sort(unique(c(NIJ4$lake.R[NIJ4$county_name.to == current.county])))

#STEP 9: DEFINE SHORTCUT VARIABLE Ri. It is the IDs of all the edges that are of interest to county k when it is considering where best to defensively and inwardly place inspection stations. In practice, these are just those whose receiving lake is within county k. If county k is sending a lot of boats out to other counties, it doesn't care about that (though we will!), so it can't factor into the county's decision (but WILL factor into our final total of risky boats intercepted). Meanwhile, lakes that are receiving risky boats, whether from out of county or even from within the county, will count either way, so a county will consider placing inspectors at lakes sending a lot of risky boats out, even if they aren't receiving many risky boats in, if those outgoing risky boats are moving to other lakes in the same county k. Again, filtering like this cuts computational think time.
edge.IDs.Ri = sort(which(NIJ4$county_name.to == current.county))

#STEP 10: RE-DEFINE THE MODEL SO AS TO RELATE TO ONLY EDGES OF INTEREST TO THE CURRENT COUNTY. Here, we shrink the problem to just those rows/columns in Vijj' that are of relevance to the current county.

Vij.mini = Vij[edge.IDs.Ri, lake.IDs.Fj]

#It could be the case that we have a single lake and/or with a single edge (I'm looking at you Norman county!), in which case we have to side-step around the entirety of the remaining code inside the loop because, hey, we're probably inspecting that single lake if it's our only option! We also need to guard against the possibility of a county having no important lakes to inspect, in which case we can just store a blank for that county.

if(!is.null(dim(Vij.mini)) & 
   length(lake.IDs.Fj) > 0) { #Assuming Vij.mini is still a matrix (there are multiple rows and multiple columns corresponding to multiple relevant edges AND multiple lakes (and especially more than 0 lakes))....
  
  lengthi = dim(Vij.mini)[1] #The length of subscript i will reflect the number of edges of relevance to the current county.
  lengthj = dim(Vij.mini)[2] #The length of subscript j will reflect the number of lakes of relevance to the current county.
} else { 
  #Otherwise, grab the 0 or 1 lakes there are in the current county and just "inspect" those and skip to the next county--there's really nothing more to consider. 
  lakes.selected.B[[county]] = data.frame(B = budgets.list[[county]], global.lakes = lake.IDs.Fj); next 
  } 

boats.mini = boats[edge.IDs.Ri] #Reduce those total numbers of boats moving along edges yearly to only those the current county will actually consider when making decisions. 

#STEP 11: BUILD THE MODEL IN THE CONTEXT OF THE CURRENT COUNTY. 
#We first define our two decision variables, Xi and Yj. These correspond to the edges i and the lakes j being chosen for inspection. 
#We then place one constraint on these for now: The sum of Yjj' x Vijj'k must be greater than or equal to Xi--you cannot inspect an edge unless that edge is of interest to county k (it is 1 in Vijj'k for either lake j or lake j' and within county k's jurisdiction). That is, the edge involves a lake available for inspection (Vijj' is 1 for either j or j' or both), and one of those lakes is actually being inspected (Yjj' is 1 for either j or j' or both). If any one of those conditions fail, there would be a 0 that would make the product 0 and that Xi value thus couldn't be 1, meaning that edge is not receiving an inspection.
#We then define our objective: The sum of Xi (the edges being inspected) times the boats moving along those edges should be maximized. 
#NOTE: This step definitely take a while!
mod1 = MIPModel() %>% 
  add_variable(X[i], type = "binary", i = 1:lengthi) %>% 
  add_variable(Y[j], type="binary", j = 1:lengthj) %>% 
  add_constraint(sum_over(Vij.mini[i,j] * Y[j], j = 1:lengthj) >= X[i], i=1:lengthi) %>% 
  set_objective(sum_over(X[i] * boats.mini[i], i = 1:lengthi), "max") 

print("We've made this county's base model object!") #Progress reporting.

#STEP 12: STEPPING THRU THE VARIOUS BUDGET LEVELS.
#Now that we have defined the problem as narrowly as possible for the given county, we now need to understand what the county will do with each of the budgets we will consider allotting to it. 
bs.todo = budgets.list[[county]] #This extracts out the list of budgets to try.

lakes.selected.k = data.frame(B = vector(), global.lakes = vector()) #Create an empty storage object for this county's behaviors.

for(q in 1:length(bs.todo)) { #Start stepping over budgets.
B = bs.todo[q] #Set the budget for this step in the loop. 

#Before we figure out what the current county will do with its current budget, we should ask--is it being allotted more inspection stations than it can use? By how much? Do we even need to use optimization to figure out what it would do? Depending on the answers to these questions, we may be able to "short-circuit" this process, which would save us considerable time!
if(lengthj <= B) { 
  #If the current budget is equal to or greater than the number of relevant lakes to even inspect, just inspect all available lakes and move on.  
  lakes.selected.k = rbind(lakes.selected.k, data.frame(B = B, global.lakes = lake.IDs.Fj))
  
    print(paste0("For ", current.county, " county and budget level ", B, 
             ", we only needed a budget of ", lengthj, " to inspect all relevant lakes"))
    next } 

#STEP 13: ADD THE BUDGET AS A CONSTRAINT TO OUR MODEL--DO NOT ALLOW MORE LAKES TO BE INSPECTED THAN THERE ARE INSPECTION STATIONS AVAILABLE TO PLACE. 
mod2 = mod1 %>% 
  add_constraint(sum_over(Y[j], j = 1:lengthj) <= B)

print(paste0("Now solving budget level ", B)) #Progress reporting.

#STEP 14: SOLVE THE AVAILABLE MODEL BASED ON THE CURRENT BUDGET. Use the Symphony solver, setting a max time limit of 1 hour and a gap limit of 1.5%, whichever is hit first. 
res1 = solve_model(mod2, with_ROI(solver = "symphony", time_limit=600, gap_limit=1.5))

res2 = get_solution(res1, Y[j]) %>% 
  filter(value == 1) #Extract just the raw, within-model index numbers of the lakes picked--that's all we really need. 

global.lakes = lake.IDs.Fj[res2$j] #This back-converts the lakes picked to their global index numbers we set earlier. 

print(paste0("Solutions for budget level ", B, " are lakes ")); print(global.lakes) #Progress reporting

lakes.selected.k = rbind(lakes.selected.k, data.frame(B, global.lakes))
mod2 = NULL #This step wipes out the old mod2 object so a brand new one can take it's place--otherwise, it doesn't seem to overwrite properly between model loops...

} #/budget levels

lakes.selected.B[[county]] = lakes.selected.k #Add our smaller solutions from the current county to our larger solutions storage object. 

mod1 = NULL #Wipe out the old base model object once we change to a new county.

} #/counties

#Save the output of the model to file for later processing. 
saveRDS(lakes.selected.B, "lakes_selected_collabs")