RBBGCMuso/RBBGCMuso/R/calibration.R
2020-09-23 08:13:12 +02:00

312 lines
13 KiB
R

#' optiMuso
#'
#' This function calculates the -users specified- likelihood for random model input.
#'
#' @author Roland HOLLOS
#' @param measuredDataFile a
#' @param parameters b
#' @param sep c
#' @param startDate d
#' @param endDate e
#' @param formatString a
#' @param filterCol a
#' @param filterVal b
#' @param selVar c
#' @param outLoc c
#' @param pretag a
#' @param calPar a
#' @param skipSpinup a
#' @param iterations c
#' @param constrains d
#' @param likelihood d
#' @param settings e
#' @param leapYear b
#' @param plotName u
#' @importFrom ggplot2 ggplot aes_string geom_point ggsave
#' @importFrom magrittr '%>%'
#' @importFrom gridExtra grid.arrange
#' @export
optiMuso <- function(measuredData, parameters = NULL, startDate = NULL,
endDate = NULL, formatString = "%Y-%m-%d",
dataVar, outLoc = "./calib",
preTag = "cal-", settings = setupMuso(),
outVars = NULL, iterations = 30,
skipSpinup = TRUE, plotName = "calib.jpg",
modifyOriginal=TRUE, likelihood, uncertainity = NULL,
naVal = NULL, postProcString = NULL, w=NULL, lg=FALSE, parallel = TRUE) {
# Exanding likelihood
likelihoodFull <- as.list(rep(NA,length(dataVar)))
names(likelihoodFull) <- names(dataVar)
if(!missing(likelihood)) {
lapply(names(likelihood),function(x){
likelihoodFull[[x]] <<- likelihood[[x]]
})
}
defaultLikelihood <- which(is.na(likelihood))
if(length(defaultLikelihood)>0){
likelihoodFull[[defaultLikelihood]] <- (function(x, y){
exp(-sqrt(mean((x-y)^2)))
})
}
mdata <- measuredData
if(is.null(parameters)){
parameters <- tryCatch(read.csv("parameters.csv", stringsAsFactor=FALSE), error = function (e) {
stop("You need to specify a path for the parameters.csv, or a matrix.")
})
} else {
if((!is.list(parameters)) & (!is.matrix(parameters))){
parameters <- tryCatch(read.csv(parameters, stringsAsFactor=FALSE), error = function (e){
stop("Cannot find neither parameters file neither the parameters matrix")
})
}}
outLoc <- normalizePath(outLoc)
outLocPlain <- basename(outLoc)
currDir <- getwd()
if(!dir.exists(outLoc)){
dir.create(outLoc)
warning(paste(outLoc," is not exists, so it was created"))
}
outLoc <- normalizePath(outLoc)
parameterNames <- parameters[,1]
pretag <- file.path(outLoc,preTag)
##reading the original epc file at the specified
## row numbers
print("optiMuso is randomizing the epc parameters now...",quote = FALSE)
if(iterations < 3000){
randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = 3000)
randVals[[2]]<- randVals[[2]][sample(1:3000,iterations),]
} else {
randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = iterations)
}
origEpc <- readValuesFromFile(settings$epc[2],randVals[[1]])
partialResult <- matrix(ncol=length(randVals[[1]])+2*length(dataVar))
colN <- randVals[[1]]
colN[match(parameters[,2],randVals[[1]])] <- parameters[,1]
colnames(partialResult) <- c(colN,sprintf("%s_likelihood",names(dataVar)),
sprintf("%s_rmse",names(dataVar)))
numParameters <- length(colN)
partialResult[1:numParameters] <- origEpc
## Prepare the preservedCalib matrix for the faster
## run.
pretag <- file.path(outLoc,preTag)
musoCodeToIndex <- sapply(dataVar,function(musoCode){
settings$dailyOutputTable[settings$dailyOutputTable$code == musoCode,"index"]
})
resultRange <- (numParameters + 1):(ncol(partialResult))
## Creating function for generating separate
## csv files for each run
progBar <- txtProgressBar(1,iterations,style=3)
settings$iniInput[2] %>%
(function(x) paste0(dirname(x),"/",tools::file_path_sans_ext(basename(x)),"-tmp.",tools::file_ext(x))) %>%
unlink
randValues <- randVals[[2]]
settings$calibrationPar <- randVals[[1]]
if(!is.null(naVal)){
measuredData <- as.data.frame(measuredData)
measuredData[measuredData == naVal] <- NA
}
alignIndexes <- alignMuso(settings,measuredData)
if(!is.null(uncertainity)){
uncert <- measuredData[alignIndexes$meas,uncertainity]
} else {
uncert <- NULL
}
# browser()
origModellOut <- calibMuso(settings=settings, silent=TRUE, skipSpinup = skipSpinup, postProcString=postProcString, modifyOriginal=modifyOriginal)
partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar,
mod=origModellOut,
mes=measuredData,
likelihoods=likelihood,
alignIndexes=alignIndexes,
musoCodeToIndex = musoCodeToIndex,uncert=uncert)
write.csv(x=origModellOut, file=paste0(pretag, 1, ".csv"))
print("Running the model with the random epc values...", quote = FALSE)
# if(!is.null(postProcString)){
# colNumb <- length(settings$dailyVarCodes) + 1
# }
write.csv(x=partialResult, file="preservedCalib.csv",row.names=FALSE)
for(i in 2:(iterations+1)){
# browser()
tmp <- tryCatch(calibMuso(settings = settings,
parameters = randValues[(i-1),],
silent= TRUE,
skipSpinup = skipSpinup, modifyOriginal=modifyOriginal, postProcString = postProcString), error = function (e) NULL)
if(is.null(tmp)){
partialResult[,resultRange] <- NA
} else {
partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar,
mod=tmp,
mes=measuredData,
likelihoods=likelihood,
alignIndexes=alignIndexes,
musoCodeToIndex = musoCodeToIndex, uncert = uncert)
}
partialResult[1:numParameters] <- randValues[(i-1),]
write.table(x=partialResult, file="preservedCalib.csv", append=TRUE, row.names=FALSE,
sep=",", col.names=FALSE)
write.csv(x=tmp, file=paste0(pretag, (i+1),".csv"))
setTxtProgressBar(progBar,i)
}
musoGlue("preservedCalib.csv",w=w, lg = lg)
}
alignMuso <- function (settings,measuredData) {
# Have to fix for other starting points also
modelDates <- seq(from= as.Date(sprintf("%s-01-01",settings$startYear)),
by="days",
to=as.Date(sprintf("%s-12-31",settings$startYear+settings$numYears-1)))
modelDates <- grep("-02-29",modelDates,invert=TRUE, value=TRUE)
measuredDates <- apply(measuredData,1,function(xrow){
sprintf("%s-%s-%s",xrow[1],xrow[2],xrow[3])
})
modIndex <- match(as.Date(measuredDates), as.Date(modelDates))
measIndex <- which(!is.na(modIndex))
modIndex <- modIndex[!is.na(modIndex)]
cbind.data.frame(model=modIndex,meas=measIndex)
}
# calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex, uncert){
#
# likelihoodRMSE <- sapply(names(dataVar),function(key){
# # browser()
# modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]]
# measured <- mes[alignIndexes$meas,key]
# modelled <- modelled[!is.na(measured)]
# # uncert <- uncert[!is.na(measured)]
# measured <- measured[!is.na(measured)]
# res <- c(likelihoods[[key]](modelled, measured, uncert),
# sqrt(mean((modelled-measured)^2))
# )
# res
# })
# names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar))
#
# return(c(likelihoodRMSE[1,],likelihoodRMSE[2,]))
# }
#' musoGlue
#'
#' This function calculates the -users specified- likelihood for random model input.
#'
#' @author Roland HOLLOS
#' @param plotName u
#' @export
musoGlue <- function(presCalFile, w, delta = 0.17, settings=setupMuso(), parameters=read.csv("parameters.csv",
stringsAsFactors=FALSE), lg=FALSE){
if(is.data.frame(presCalFile)){
preservedCalib <- presCalFile
} else {
preservedCalib <- read.csv(presCalFile)
}
paramIndex <- parameters[(match(colnames(preservedCalib),parameters[,1])),2]
paramIndex <- paramIndex[!is.na(paramIndex)]
paramIndex <- c(paramIndex,
as.numeric(gsub("X","",
grep("X[0-9]{1,}",
colnames(preservedCalib),value=TRUE))))
preservedCalib <- preservedCalib[-1,] #original
likeIndexes <- grep("likelihood",colnames(preservedCalib))
if(!is.null(w)){
forCombine<- sapply(names(w),function(n){
grep(sprintf("%s_likelihood",n),colnames(preservedCalib))
})
preservedCalib[["combined"]] <- apply(as.data.frame(Map(function(x,y){
toNormalize <- preservedCalib[,y]
toNormalize <- toNormalize / sqrt(sum(x^2))
toNormalize * x
},w,forCombine)), 1, sum)
} else {
preservedCalib[["combined"]] <- preservedCalib[,grep("likelihood",colnames(preservedCalib),value=TRUE)]
}
parameterIndexes <- 1:(min(likeIndexes)-1)
preservedCalib <- preservedCalib[!is.na(preservedCalib$combined),]
unfilteredLikelihood <- preservedCalib$combined
top5points <- preservedCalib$combined>quantile(preservedCalib$combined,0.95)
preservedCalibtop5 <- preservedCalib[,]
optRanges <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.05,0.5,0.95))))
pdf("dotplot.pdf")
if(lg){
plot(Reduce(min, -(unfilteredLikelihood), accumulate=TRUE),type="l", ylab="-log(likelihood)",xlab="iterations")
} else {
plot(Reduce(min, -log(unfilteredLikelihood), accumulate=TRUE),type="l", ylab="-log(likelihood)",xlab="iterations")
}
pari <- par(mfrow=c(1,2))
for(i in seq_along(colnames(preservedCalib)[parameterIndexes])){
plot(preservedCalib[,i],preservedCalib[,"combined"],pch=19,cex=.1, ylab="likelihood",
main = colnames(preservedCalib)[i], xlab="")
plot(preservedCalibtop5[,i],preservedCalibtop5[,"combined"],pch=19,cex=.1, ylab="likelihood",
main = paste0(colnames(preservedCalibtop5)[i]," (behav.)"), xlab="")
abline(v=optRanges[i,1],col="blue")
abline(v=optRanges[i,2],col="green")
abline(v=optRanges[i,3],col="red")
}
par(pari)
dev.off()
maxParValues <- preservedCalibtop5[which.max(preservedCalibtop5$combined),]
maxParIndexes <- paramIndex
write.csv(cbind.data.frame(calibrationPar=maxParValues,parameters=maxParIndexes),"maxLikelihood.csv")
write.csv(optRanges,"optRanges.csv")
# browser()
# There are some serious problems with this implementation. The uncertainity bouns are not for the parameters, but for the output values. The median is pointwise median for all simulation.
# And the 95 and 5 percentile also.
# dataVec <- preservedCalibtop5$combined
# closestToMedian <- function (dataVec) {
# match(sort(dataVec)[min(which(sort(dataVec)>=median(dataVec)))], dataVec)
# }
#
# while(is.null(optimalEpc)){
# match(quantile(preservedCalibtop5$combined,0.5), preservedCalibtop5$combined)
# optInterval <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.5-delta,0.5+delta))))
# optParamRange <- cbind.data.frame(rownames(optInterval)[parameterIndexes],as.numeric(paramIndex),optInterval[parameterIndexes,])
# optimalEpc <- tryCatch(musoRand(optParamRange,iterations = 2), error=function(e){NULL})
# delta <- delta*1.05
# if(delta > 0.5){
# delta <- 0.5
# }
# if((delta == 0.5) && is.null(optimalEpc)){
# stop("cannot find optimal value in the given range")
# }
# }
# print("getOptim")
# optimalEpc[[2]] <- optimalEpc[[2]][1,]
# write.csv(as.data.frame(optimalEpc),"epcOptim.csv")
# print(head(optRanges,n=-2))
# calibMuso(calibrationPar=optimalEpc[[1]],parameters=optimalEpc[[2]])
# file.copy(settings$epcInput[2],"epcOptim.epc")
}
generateOptEpc <- function(optRanges,delta, maxLikelihood=FALSE){
if(missing(delta)){
}
}