From 7a8b1aa68b1abd0bb0dbe7a6c19f5d2fcc66f9c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Roland=20Holl=C3=B3s?= <hollorol@gmail.com>
Date: Wed, 27 May 2020 23:55:51 +0200
Subject: [PATCH] multiobjective calibration

---
 RBBGCMuso/R/calibMuso.R   |   2 +-
 RBBGCMuso/R/calibration.R | 263 +++++++++++++++++++++-----------------
 2 files changed, 149 insertions(+), 116 deletions(-)

diff --git a/RBBGCMuso/R/calibMuso.R b/RBBGCMuso/R/calibMuso.R
index b42089d..73264c2 100644
--- a/RBBGCMuso/R/calibMuso.R
+++ b/RBBGCMuso/R/calibMuso.R
@@ -165,7 +165,7 @@ calibMuso <- function(settings=setupMuso(), calibrationPar=NULL,
         }
     }
 
-        if(modifyOriginal){
+        if(modifyOriginal && (!is.null(parameters) || !is.null(outVars))){
             file.copy(toModif[fileToChange], origsourceFiles[fileToChange], overwrite = TRUE)
         }
     
diff --git a/RBBGCMuso/R/calibration.R b/RBBGCMuso/R/calibration.R
index 62829af..70338b2 100644
--- a/RBBGCMuso/R/calibration.R
+++ b/RBBGCMuso/R/calibration.R
@@ -28,27 +28,28 @@
 #' @export
 optiMuso <- function(measuredData, parameters = NULL, startDate = NULL,
                      endDate = NULL, formatString = "%Y-%m-%d",
-                     leapYearHandling = TRUE,
                      dataVar, outLoc = "./calib",
-                     preTag = "cal-",
-                     settings =  NULL,
-                     outVars = NULL,
-                     iterations = 30,
-                     skipSpinup = TRUE,
-                     constrains = NULL,
-                     plotName = "calib.jpg",
-                     modifyOriginal=TRUE,
-		     likelihood = function(x, y){
-			 exp(-sqrt(mean((x-y)^2)))
-		     },
-                     continious,
-		     modelVar = 3009,
-             naVal = NULL,
-                     postProcString = NULL)
-{
-    mdata <- measuredData
-    dataCol <- grep(dataVar, colnames(measuredData))
+                     preTag = "cal-", settings =  setupMuso(),
+                     outVars = NULL, iterations = 30,
+                     skipSpinup = TRUE, plotName = "calib.jpg",
+                     modifyOriginal=TRUE, likelihood,
+                     naVal = NULL, postProcString = NULL, w=NULL) {
+    # Exanding likelihood
+    likelihoodFull <- as.list(rep(NA,length(dataVar)))
+    names(likelihoodFull) <- names(dataVar)
+    if(!missing(likelihood)) {
+        lapply(names(likelihood),function(x){
+                   likelihoodFull[[x]] <<- likelihood[[x]]
+                     })
+    }
+   defaultLikelihood <- which(is.na(likelihood))
+   if(length(defaultLikelihood)>0){
+        likelihoodFull[[defaultLikelihood]] <- (function(x, y){
+                                                       exp(-sqrt(mean((x-y)^2)))
+                                                })
+   }
 
+    mdata <- measuredData
     if(is.null(parameters)){
         parameters <- tryCatch(read.csv("parameters.csv", stringsAsFactor=FALSE), error = function (e) {
             stop("You need to specify a path for the parameters.csv, or a matrix.")
@@ -70,149 +71,181 @@ optiMuso <- function(measuredData, parameters = NULL, startDate = NULL,
     }
 
     outLoc <- normalizePath(outLoc)
-
-    if(is.null(settings)){
-        settings <- setupMuso()
-    }
-
     parameterNames <- parameters[,1]
     pretag <- file.path(outLoc,preTag)
-    npar <- length(settings$calibrationPar)
-
     ##reading the original epc file at the specified
     ## row numbers
     print("optiMuso is randomizing the epc parameters now...",quote = FALSE)
     if(iterations < 3000){
-        randVals <- musoRand(parameters = parameters,constrains = constrains, iterations = 3000)
+        randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = 3000)
         randVals[[2]]<- randVals[[2]][sample(1:3000,iterations),]
     } else {
-        randVals <- musoRand(parameters = parameters,constrains = constrains, iterations = iterations)
+        randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = iterations)
     }
 
-    origEpc <- readValuesFromFile(settings$epc[2],parameters[,2])
-
+    origEpc <- readValuesFromFile(settings$epc[2],randVals[[1]])
+    partialResult <- matrix(ncol=length(randVals[[1]])+2*length(dataVar))
+    colN <- randVals[[1]]
+    colN[match(parameters[,2],randVals[[1]])] <- parameters[,1]
+    colnames(partialResult) <- c(colN,sprintf("%s_likelihood",names(dataVar)),
+                                      sprintf("%s_rmse",names(dataVar)))
+    numParameters <- length(colN)
+    partialResult[1:numParameters] <- origEpc
     ## Prepare the preservedCalib matrix for the faster
     ##  run.
 
     pretag <- file.path(outLoc,preTag)
 
+     musoCodeToIndex  <- sapply(dataVar,function(musoCode){
+           settings$dailyOutputTable[settings$dailyOutputTable$code == musoCode,"index"]
+        })
+    resultRange <- (numParameters + 1):(ncol(partialResult))
     ## Creating function for generating separate
     ## csv files for each run
 
     progBar <- txtProgressBar(1,iterations,style=3)
-    colNumb <- which(settings$dailyVarCodes == modelVar)
     settings$iniInput[2] %>%
         (function(x) paste0(dirname(x),"/",tools::file_path_sans_ext(basename(x)),"-tmp.",tools::file_ext(x))) %>%
         unlink
     randValues <- randVals[[2]]
+
     settings$calibrationPar <- randVals[[1]]
+
     if(!is.null(naVal)){
         measuredData <- as.data.frame(measuredData)
         measuredData[measuredData == naVal] <- NA
     }
-    list2env(alignData(measuredData,dataCol = dataCol,modellSettings = settings,startDate = startDate,endDate = endDate,leapYear = leapYearHandling, continious = continious),envir=environment())
-    ## modIndex and measuredData are created.  
 
+    alignIndexes <- alignMuso(settings,measuredData)
 
-    modellOut <- numeric(iterations + 1) # single variable solution
-    rmse <- numeric(iterations + 1)
-    origModellOut <- calibMuso(settings=settings,silent=TRUE, skipSpinup = skipSpinup,postProcString=postProcString, modifyOriginal=modifyOriginal)
-
-
-    write.csv(x=origModellOut, file=paste0(pretag,1,".csv"))
-    modellOut[1] <- likelihood(measuredData,origModellOut[modIndex,colNumb])
-    rmse[1] <- sqrt(mean((measuredData-origModellOut[modIndex,colNumb])^2))
+    origModellOut <- calibMuso(settings=settings, silent=TRUE, skipSpinup = skipSpinup, postProcString=postProcString, modifyOriginal=modifyOriginal)
+    partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar, 
+                                                          mod=origModellOut,
+                                                          mes=measuredData,
+                                                          likelihoods=likelihood,
+                                                           alignIndexes=alignIndexes,
+                                                           musoCodeToIndex = musoCodeToIndex)
+    write.csv(x=origModellOut, file=paste0(pretag, 1, ".csv"))
     print("Running the model with the random epc values...", quote = FALSE)
 
-    if(!is.null(postProcString)){
-        colNumb <- length(settings$dailyVarCodes) + 1
-    }
+    # if(!is.null(postProcString)){
+    #     colNumb <- length(settings$dailyVarCodes) + 1
+    # }
     
-    partialTemplate <- matrix(ncol=length(randVals[[1]])+2)
-    colN <- randVals[[1]]
-    colN[match(parameters[,2],randVals[[1]])] <- parameters[,1]
-
-    colnames(partialTemplate) <- c(colN, "rmse","likelihood")
-    partialTemplate[1:((length(randVals[[1]]))+2)] <- c(readValuesFromFile(settings$epc[2],randVals[[1]]),modellOut[1], rmse[1])
-    write.csv(x=partialTemplate, file="preservedCalib.csv",row.names=FALSE)
+    write.csv(x=partialResult, file="preservedCalib.csv",row.names=FALSE)
     for(i in 2:(iterations+1)){
         tmp <- tryCatch(calibMuso(settings = settings,
                                   parameters = randValues[(i-1),],
                                   silent= TRUE,
-                                  skipSpinup = skipSpinup, modifyOriginal=modifyOriginal, postProcString = postProcString)[modIndex,colNumb], error = function (e) NULL        )
+                                  skipSpinup = skipSpinup, modifyOriginal=modifyOriginal, postProcString = postProcString), error = function (e) NULL)
         if(is.null(tmp)){
-           tmp <- rmse[i] <- modellOut[i] <- NA
+           partialResult[,resultRange] <- NA
         } else {
-            modellOut[i]<- likelihood(measuredData,tmp)
-            rmse[i] <- sqrt(mean((measuredData-tmp)^2))
+            partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar, 
+                                                          mod=tmp,
+                                                          mes=measuredData,
+                                                          likelihoods=likelihood,
+                                                           alignIndexes=alignIndexes,
+                                                           musoCodeToIndex = musoCodeToIndex)
         }
 
-        partialTemplate[1:(length(randVals[[1]])+2)] <- c(randValues[(i-1),], rmse[i], modellOut[i])
-        write.table(x=partialTemplate, file="preservedCalib.csv",append=TRUE,row.names=FALSE,sep=",",col.names=FALSE)
+
+        partialResult[1:numParameters] <- randValues[(i-1),]
+        write.table(x=partialResult, file="preservedCalib.csv", append=TRUE, row.names=FALSE,
+                    sep=",", col.names=FALSE)
         write.csv(x=tmp, file=paste0(pretag, (i+1),".csv"))
         setTxtProgressBar(progBar,i)
     }
+    
+    musoGlue("preservedCalib.csv",w=w)
 
-    # paramLines <- parameters[,2]
-    # paramLines <- order(paramLines)
-    # randInd <- randVals[[1]][(randVals[[1]] %in% parameters[,2])]
-    # randInd <- order(randInd)
-    #
-    #
-    #
-    # epcStrip <- rbind(origEpc[order(parameters[,2])],
-    #                   randValues[,randVals[[1]] %in% parameters[,2]][,randInd])
-    #
-    #
-    # preservedCalib <- cbind(epcStrip,rmse,
-    #                         modellOut)
-    # columNames <-  c(parameterNames[paramLines],"rmse", "likelihood")
-    # colnames(preservedCalib) <- columNames
-    # write.csv(preservedCalib,"preservedCalib.csv")
-
-    preservedCalib<- read.csv("preservedCalib.csv")
-    p<-list()
-    preservedCalib <- preservedCalib[-1,]
-    preservedCalib <- preservedCalib[!is.na(preservedCalib$likelihood),]
-    dontInclude <-c((ncol(preservedCalib)-1),ncol(preservedCalib))
-
-    # for(i in seq_along(colnames(preservedCalib)[-dontInclude])){
-    #     p[[i]] <- ggplot(as.data.frame(preservedCalib),aes_string(colnames(preservedCalib)[i],"likelihood")) +
-    #         geom_point(shape='.',size=1,alpha=0.8)
-    # }
-    unfilteredLikelihood <-  preservedCalib$likelihood
-    preservedCalib <- preservedCalib[preservedCalib$likelihood>quantile(preservedCalib$likelihood,0.95),]
-    optRanges <-t(apply(preservedCalib,2,function(x) quantile(x,c(0.05,0.5,0.95))))  
-
-pdf("dotplot.pdf")
-    plot(Reduce(min, -log(unfilteredLikelihood), accumulate=TRUE),type="l", ylab="-log(likelihood)",xlab="iterations")
-    for(i in seq_along(colnames(preservedCalib)[-dontInclude])){
-        plot(preservedCalib[,i],preservedCalib[,"likelihood"],pch=19,cex=.1, ylab="likelihood",
-             main = colnames(preservedCalib)[i], xlab="")
-        abline(v=optRanges[i,1],col="blue")
-        abline(v=optRanges[i,2],col="green")
-        abline(v=optRanges[i,3],col="red")
-        
-    }
-dev.off()
-
-    # ggsave(plotName,grid.arrange(grobs = p, ncol = floor(sqrt(ncol(preservedCalib)-1))), dpi = 1000)
-    # maxLikelihoodPlace  <- which(preservedCalib[,"likelihood"]==max(preservedCalib[,"likelihood"],na.rm = TRUE))
-    # resPlot <- plotMusoWithData(mdata = measuredData, startDate = startDate, endDate = endDate,
-    #                             dataVar = dataVar, modelVar = modelVar, settings = settings, continious = continious) +
-    #     plotMuso(settings = settings, parameters = randValues[maxLikelihoodPlace,],
-    #              postProcString = postProcString, skipSpinup = FALSE, variable = colNumb, layerPlot = TRUE, colour = "green")
-    #
-    # print(resPlot)
-    # tempEpc <- paste0(tools::file_path_sans_ext(basename(settings$epcInput[2])),"-tmp.",tools::file_ext(settings$epcInput[2]))
-    # file.rename(tempEpc, "optimizedEpc.epc")
-    # return(preservedCalib[maxLikelihoodPlace,])
-    write.csv(optRanges,"optRanges.csv")
-    # is.num <- sapply(head(optRanges,-2), is.numeric)
-    # optRanges[is.num] <- lapply(optRanges[is.num], round, 4)
-    return(head(optRanges,n=-2))
 }
 
 
 
 
+alignMuso <- function (settings,measuredData) {
+        # Have to fix for other starting points also
+         modelDates <- seq(from= as.Date(sprintf("%s-01-01",settings$startYear)), 
+             by="days",
+             to=as.Date(sprintf("%s-12-31",settings$startYear+settings$numYears-1)))
+         modelDates <- grep("-02-29",modelDates,invert=TRUE, value=TRUE)
+
+        measuredDates <- apply(measuredData,1,function(xrow){
+                                   sprintf("%s-%s-%s",xrow[1],xrow[2],xrow[3])
+        })
+
+        modIndex <- match(as.Date(measuredDates), as.Date(modelDates))
+        measIndex <- which(!is.na(modIndex))
+        modIndex <- modIndex[!is.na(modIndex)]
+        cbind.data.frame(model=modIndex,meas=measIndex)
+} 
+
+calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex){
+
+    likelihoodRMSE <- sapply(names(dataVar),function(key){
+               modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]]
+               measured <- mes[alignIndexes$meas,key]
+               modelled <- modelled[!is.na(measured)] 
+               measured <- measured[!is.na(measured)] 
+               res <- c(likelihoods[[key]](modelled,measured),
+                        sqrt(mean((modelled-measured)^2))
+               )
+               res
+        })
+    names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar))
+
+    return(c(likelihoodRMSE[1,],likelihoodRMSE[2,]))
+}
+
+musoGlue <- function(preservedCalib, w){
+    preservedCalib<- read.csv(preservedCalib)
+    preservedCalib <- preservedCalib[-1,] #original
+
+    likeIndexes <- grep("likelihood",colnames(preservedCalib))
+    if(!is.null(w)){
+        forCombine<- sapply(names(w),function(n){
+                                grep(sprintf("%s_likelihood",n),colnames(preservedCalib))
+        })
+        preservedCalib[["combined"]] <- apply(as.data.frame(Map(function(x,y){
+                                                           toNormalize  <- preservedCalib[,y]
+                                                           toNormalize <- toNormalize / sqrt(sum(x^2))
+                                                           toNormalize * x
+
+                                            },w,forCombine)), 1, sum)
+    } else {
+        preservedCalib[["combined"]] <- grep("likelihood",colnames(preservedCalib),value=TRUE)
+    }
+
+    parameterIndexes <- 1:(min(likeIndexes))
+    preservedCalib <- preservedCalib[!is.na(preservedCalib$combined),]
+    unfilteredLikelihood <-  preservedCalib$combined
+    preservedCalibtop5 <- preservedCalib[preservedCalib$combined>quantile(preservedCalib$combined,0.95),]
+    optRanges <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.05,0.5,0.95))))  
+    pdf("dotplot.pdf")
+    plot(Reduce(min, -log(unfilteredLikelihood), accumulate=TRUE),type="l", ylab="-log(likelihood)",xlab="iterations")
+    pari <- par(mfrow=c(1,2)) 
+    for(i in seq_along(colnames(preservedCalib)[parameterIndexes])){
+        plot(preservedCalib[,i],preservedCalib[,"combined"],pch=19,cex=.1, ylab="likelihood",
+             main = colnames(preservedCalib)[i], xlab="")
+        plot(preservedCalibtop5[,i],preservedCalibtop5[,"combined"],pch=19,cex=.1, ylab="likelihood",
+             main = paste0(colnames(preservedCalibtop5)[i]," (behav.)"), xlab="")
+        abline(v=optRanges[i,1],col="blue")
+        abline(v=optRanges[i,2],col="green")
+        abline(v=optRanges[i,3],col="red")
+        
+    }
+    par(pari)
+    dev.off()
+    write.csv(optRanges,"optRanges.csv")
+    print(head(optRanges))
+    return(head(optRanges,n=-2))
+}
+
+generateOptEpc <- function(optRanges,delta, maxLikelihood=FALSE){
+    if(missing(delta)){
+
+    }
+    
+}
+