312 lines
13 KiB
R
312 lines
13 KiB
R
#' optiMuso
|
|
#'
|
|
#' This function calculates the -users specified- likelihood for random model input.
|
|
#'
|
|
#' @author Roland HOLLOS
|
|
#' @param measuredDataFile a
|
|
#' @param parameters b
|
|
#' @param sep c
|
|
#' @param startDate d
|
|
#' @param endDate e
|
|
#' @param formatString a
|
|
#' @param filterCol a
|
|
#' @param filterVal b
|
|
#' @param selVar c
|
|
#' @param outLoc c
|
|
#' @param pretag a
|
|
#' @param calPar a
|
|
#' @param skipSpinup a
|
|
#' @param iterations c
|
|
#' @param constrains d
|
|
#' @param likelihood d
|
|
#' @param settings e
|
|
#' @param leapYear b
|
|
#' @param plotName u
|
|
#' @importFrom ggplot2 ggplot aes_string geom_point ggsave
|
|
#' @importFrom magrittr '%>%'
|
|
#' @importFrom gridExtra grid.arrange
|
|
#' @export
|
|
optiMuso <- function(measuredData, parameters = NULL, startDate = NULL,
|
|
endDate = NULL, formatString = "%Y-%m-%d",
|
|
dataVar, outLoc = "./calib",
|
|
preTag = "cal-", settings = setupMuso(),
|
|
outVars = NULL, iterations = 30,
|
|
skipSpinup = TRUE, plotName = "calib.jpg",
|
|
modifyOriginal=TRUE, likelihood, uncertainity = NULL,
|
|
naVal = NULL, postProcString = NULL, w=NULL, lg=FALSE, parallel = TRUE) {
|
|
# Exanding likelihood
|
|
likelihoodFull <- as.list(rep(NA,length(dataVar)))
|
|
names(likelihoodFull) <- names(dataVar)
|
|
if(!missing(likelihood)) {
|
|
lapply(names(likelihood),function(x){
|
|
likelihoodFull[[x]] <<- likelihood[[x]]
|
|
})
|
|
}
|
|
defaultLikelihood <- which(is.na(likelihood))
|
|
if(length(defaultLikelihood)>0){
|
|
likelihoodFull[[defaultLikelihood]] <- (function(x, y){
|
|
exp(-sqrt(mean((x-y)^2)))
|
|
})
|
|
}
|
|
|
|
mdata <- measuredData
|
|
if(is.null(parameters)){
|
|
parameters <- tryCatch(read.csv("parameters.csv", stringsAsFactor=FALSE), error = function (e) {
|
|
stop("You need to specify a path for the parameters.csv, or a matrix.")
|
|
})
|
|
} else {
|
|
if((!is.list(parameters)) & (!is.matrix(parameters))){
|
|
parameters <- tryCatch(read.csv(parameters, stringsAsFactor=FALSE), error = function (e){
|
|
stop("Cannot find neither parameters file neither the parameters matrix")
|
|
})
|
|
}}
|
|
|
|
outLoc <- normalizePath(outLoc)
|
|
outLocPlain <- basename(outLoc)
|
|
currDir <- getwd()
|
|
|
|
if(!dir.exists(outLoc)){
|
|
dir.create(outLoc)
|
|
warning(paste(outLoc," is not exists, so it was created"))
|
|
}
|
|
|
|
outLoc <- normalizePath(outLoc)
|
|
parameterNames <- parameters[,1]
|
|
pretag <- file.path(outLoc,preTag)
|
|
##reading the original epc file at the specified
|
|
## row numbers
|
|
print("optiMuso is randomizing the epc parameters now...",quote = FALSE)
|
|
if(iterations < 3000){
|
|
randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = 3000)
|
|
randVals[[2]]<- randVals[[2]][sample(1:3000,iterations),]
|
|
} else {
|
|
randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = iterations)
|
|
}
|
|
|
|
origEpc <- readValuesFromFile(settings$epc[2],randVals[[1]])
|
|
partialResult <- matrix(ncol=length(randVals[[1]])+2*length(dataVar))
|
|
colN <- randVals[[1]]
|
|
colN[match(parameters[,2],randVals[[1]])] <- parameters[,1]
|
|
colnames(partialResult) <- c(colN,sprintf("%s_likelihood",names(dataVar)),
|
|
sprintf("%s_rmse",names(dataVar)))
|
|
numParameters <- length(colN)
|
|
partialResult[1:numParameters] <- origEpc
|
|
## Prepare the preservedCalib matrix for the faster
|
|
## run.
|
|
|
|
pretag <- file.path(outLoc,preTag)
|
|
|
|
musoCodeToIndex <- sapply(dataVar,function(musoCode){
|
|
settings$dailyOutputTable[settings$dailyOutputTable$code == musoCode,"index"]
|
|
})
|
|
resultRange <- (numParameters + 1):(ncol(partialResult))
|
|
## Creating function for generating separate
|
|
## csv files for each run
|
|
|
|
progBar <- txtProgressBar(1,iterations,style=3)
|
|
settings$iniInput[2] %>%
|
|
(function(x) paste0(dirname(x),"/",tools::file_path_sans_ext(basename(x)),"-tmp.",tools::file_ext(x))) %>%
|
|
unlink
|
|
randValues <- randVals[[2]]
|
|
|
|
settings$calibrationPar <- randVals[[1]]
|
|
|
|
if(!is.null(naVal)){
|
|
measuredData <- as.data.frame(measuredData)
|
|
measuredData[measuredData == naVal] <- NA
|
|
}
|
|
|
|
alignIndexes <- alignMuso(settings,measuredData)
|
|
if(!is.null(uncertainity)){
|
|
uncert <- measuredData[alignIndexes$meas,uncertainity]
|
|
} else {
|
|
uncert <- NULL
|
|
}
|
|
# browser()
|
|
origModellOut <- calibMuso(settings=settings, silent=TRUE, skipSpinup = skipSpinup, postProcString=postProcString, modifyOriginal=modifyOriginal)
|
|
partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar,
|
|
mod=origModellOut,
|
|
mes=measuredData,
|
|
likelihoods=likelihood,
|
|
alignIndexes=alignIndexes,
|
|
musoCodeToIndex = musoCodeToIndex,uncert=uncert)
|
|
write.csv(x=origModellOut, file=paste0(pretag, 1, ".csv"))
|
|
print("Running the model with the random epc values...", quote = FALSE)
|
|
|
|
# if(!is.null(postProcString)){
|
|
# colNumb <- length(settings$dailyVarCodes) + 1
|
|
# }
|
|
|
|
write.csv(x=partialResult, file="preservedCalib.csv",row.names=FALSE)
|
|
for(i in 2:(iterations+1)){
|
|
# browser()
|
|
tmp <- tryCatch(calibMuso(settings = settings,
|
|
parameters = randValues[(i-1),],
|
|
silent= TRUE,
|
|
skipSpinup = skipSpinup, modifyOriginal=modifyOriginal, postProcString = postProcString), error = function (e) NULL)
|
|
if(is.null(tmp)){
|
|
partialResult[,resultRange] <- NA
|
|
} else {
|
|
partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar,
|
|
mod=tmp,
|
|
mes=measuredData,
|
|
likelihoods=likelihood,
|
|
alignIndexes=alignIndexes,
|
|
musoCodeToIndex = musoCodeToIndex, uncert = uncert)
|
|
}
|
|
|
|
|
|
partialResult[1:numParameters] <- randValues[(i-1),]
|
|
write.table(x=partialResult, file="preservedCalib.csv", append=TRUE, row.names=FALSE,
|
|
sep=",", col.names=FALSE)
|
|
write.csv(x=tmp, file=paste0(pretag, (i+1),".csv"))
|
|
setTxtProgressBar(progBar,i)
|
|
}
|
|
|
|
musoGlue("preservedCalib.csv",w=w, lg = lg)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
alignMuso <- function (settings,measuredData) {
|
|
# Have to fix for other starting points also
|
|
modelDates <- seq(from= as.Date(sprintf("%s-01-01",settings$startYear)),
|
|
by="days",
|
|
to=as.Date(sprintf("%s-12-31",settings$startYear+settings$numYears-1)))
|
|
modelDates <- grep("-02-29",modelDates,invert=TRUE, value=TRUE)
|
|
|
|
measuredDates <- apply(measuredData,1,function(xrow){
|
|
sprintf("%s-%s-%s",xrow[1],xrow[2],xrow[3])
|
|
})
|
|
|
|
modIndex <- match(as.Date(measuredDates), as.Date(modelDates))
|
|
measIndex <- which(!is.na(modIndex))
|
|
modIndex <- modIndex[!is.na(modIndex)]
|
|
cbind.data.frame(model=modIndex,meas=measIndex)
|
|
}
|
|
|
|
# calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex, uncert){
|
|
#
|
|
# likelihoodRMSE <- sapply(names(dataVar),function(key){
|
|
# # browser()
|
|
# modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]]
|
|
# measured <- mes[alignIndexes$meas,key]
|
|
# modelled <- modelled[!is.na(measured)]
|
|
# # uncert <- uncert[!is.na(measured)]
|
|
# measured <- measured[!is.na(measured)]
|
|
# res <- c(likelihoods[[key]](modelled, measured, uncert),
|
|
# sqrt(mean((modelled-measured)^2))
|
|
# )
|
|
# res
|
|
# })
|
|
# names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar))
|
|
#
|
|
# return(c(likelihoodRMSE[1,],likelihoodRMSE[2,]))
|
|
# }
|
|
|
|
#' musoGlue
|
|
#'
|
|
#' This function calculates the -users specified- likelihood for random model input.
|
|
#'
|
|
#' @author Roland HOLLOS
|
|
#' @param plotName u
|
|
#' @export
|
|
musoGlue <- function(presCalFile, w, delta = 0.17, settings=setupMuso(), parameters=read.csv("parameters.csv",
|
|
stringsAsFactors=FALSE), lg=FALSE){
|
|
if(is.data.frame(presCalFile)){
|
|
preservedCalib <- presCalFile
|
|
} else {
|
|
preservedCalib <- read.csv(presCalFile)
|
|
}
|
|
|
|
paramIndex <- parameters[(match(colnames(preservedCalib),parameters[,1])),2]
|
|
paramIndex <- paramIndex[!is.na(paramIndex)]
|
|
paramIndex <- c(paramIndex,
|
|
as.numeric(gsub("X","",
|
|
grep("X[0-9]{1,}",
|
|
colnames(preservedCalib),value=TRUE))))
|
|
preservedCalib <- preservedCalib[-1,] #original
|
|
|
|
likeIndexes <- grep("likelihood",colnames(preservedCalib))
|
|
if(!is.null(w)){
|
|
forCombine<- sapply(names(w),function(n){
|
|
grep(sprintf("%s_likelihood",n),colnames(preservedCalib))
|
|
})
|
|
preservedCalib[["combined"]] <- apply(as.data.frame(Map(function(x,y){
|
|
toNormalize <- preservedCalib[,y]
|
|
toNormalize <- toNormalize / sqrt(sum(x^2))
|
|
toNormalize * x
|
|
|
|
},w,forCombine)), 1, sum)
|
|
} else {
|
|
preservedCalib[["combined"]] <- preservedCalib[,grep("likelihood",colnames(preservedCalib),value=TRUE)]
|
|
}
|
|
|
|
parameterIndexes <- 1:(min(likeIndexes)-1)
|
|
preservedCalib <- preservedCalib[!is.na(preservedCalib$combined),]
|
|
unfilteredLikelihood <- preservedCalib$combined
|
|
top5points <- preservedCalib$combined>quantile(preservedCalib$combined,0.95)
|
|
preservedCalibtop5 <- preservedCalib[,]
|
|
optRanges <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.05,0.5,0.95))))
|
|
pdf("dotplot.pdf")
|
|
if(lg){
|
|
plot(Reduce(min, -(unfilteredLikelihood), accumulate=TRUE),type="l", ylab="-log(likelihood)",xlab="iterations")
|
|
} else {
|
|
plot(Reduce(min, -log(unfilteredLikelihood), accumulate=TRUE),type="l", ylab="-log(likelihood)",xlab="iterations")
|
|
}
|
|
pari <- par(mfrow=c(1,2))
|
|
for(i in seq_along(colnames(preservedCalib)[parameterIndexes])){
|
|
plot(preservedCalib[,i],preservedCalib[,"combined"],pch=19,cex=.1, ylab="likelihood",
|
|
main = colnames(preservedCalib)[i], xlab="")
|
|
plot(preservedCalibtop5[,i],preservedCalibtop5[,"combined"],pch=19,cex=.1, ylab="likelihood",
|
|
main = paste0(colnames(preservedCalibtop5)[i]," (behav.)"), xlab="")
|
|
abline(v=optRanges[i,1],col="blue")
|
|
abline(v=optRanges[i,2],col="green")
|
|
abline(v=optRanges[i,3],col="red")
|
|
|
|
}
|
|
|
|
par(pari)
|
|
dev.off()
|
|
maxParValues <- preservedCalibtop5[which.max(preservedCalibtop5$combined),]
|
|
maxParIndexes <- paramIndex
|
|
write.csv(cbind.data.frame(calibrationPar=maxParValues,parameters=maxParIndexes),"maxLikelihood.csv")
|
|
write.csv(optRanges,"optRanges.csv")
|
|
# browser()
|
|
# There are some serious problems with this implementation. The uncertainity bouns are not for the parameters, but for the output values. The median is pointwise median for all simulation.
|
|
# And the 95 and 5 percentile also.
|
|
# dataVec <- preservedCalibtop5$combined
|
|
# closestToMedian <- function (dataVec) {
|
|
# match(sort(dataVec)[min(which(sort(dataVec)>=median(dataVec)))], dataVec)
|
|
# }
|
|
#
|
|
# while(is.null(optimalEpc)){
|
|
# match(quantile(preservedCalibtop5$combined,0.5), preservedCalibtop5$combined)
|
|
# optInterval <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.5-delta,0.5+delta))))
|
|
# optParamRange <- cbind.data.frame(rownames(optInterval)[parameterIndexes],as.numeric(paramIndex),optInterval[parameterIndexes,])
|
|
# optimalEpc <- tryCatch(musoRand(optParamRange,iterations = 2), error=function(e){NULL})
|
|
# delta <- delta*1.05
|
|
# if(delta > 0.5){
|
|
# delta <- 0.5
|
|
# }
|
|
# if((delta == 0.5) && is.null(optimalEpc)){
|
|
# stop("cannot find optimal value in the given range")
|
|
# }
|
|
# }
|
|
# print("getOptim")
|
|
# optimalEpc[[2]] <- optimalEpc[[2]][1,]
|
|
# write.csv(as.data.frame(optimalEpc),"epcOptim.csv")
|
|
# print(head(optRanges,n=-2))
|
|
# calibMuso(calibrationPar=optimalEpc[[1]],parameters=optimalEpc[[2]])
|
|
# file.copy(settings$epcInput[2],"epcOptim.epc")
|
|
}
|
|
|
|
generateOptEpc <- function(optRanges,delta, maxLikelihood=FALSE){
|
|
if(missing(delta)){
|
|
|
|
}
|
|
|
|
}
|