426 lines
20 KiB
R
426 lines
20 KiB
R
#' calibrateMuso
|
|
#'
|
|
#' This funtion uses the Monte Carlo technique to uniformly sample the parameter space from user defined parameters of the Biome-BGCMuSo model. The sampling algorithm ensures that the parameters are constrained by the model logic which means that parameter dependencies are fully taken into account (parameter dependency means that e.g leaf C:N ratio must be smaller than C:N ratio of litter; more complicated rules apply to the allocation parameters where the allocation fractions to different plant compartments must sum up 1). This function implements a mathematically correct solution to provide uniform distriution for all selected parameters.
|
|
#' @author Roland HOLLOS
|
|
#' @importFrom future future
|
|
#' @export
|
|
calibrateMuso <- function(measuredData, parameters =read.csv("parameters.csv", stringsAsFactor=FALSE), startDate = NULL,
|
|
endDate = NULL, formatString = "%Y-%m-%d",
|
|
dataVar, outLoc = "./calib",
|
|
preTag = "cal-", settings = setupMuso(),
|
|
outVars = NULL, iterations = 100,
|
|
skipSpinup = TRUE, plotName = "calib.jpg",
|
|
modifyOriginal=TRUE, likelihood, uncertainity = NULL,
|
|
naVal = NULL, postProcString = NULL,
|
|
sourceFile=NULL, # bases for musoRand if dependecy group is not fully defined by parameters.csv
|
|
thread_prefix="thread", numCores = max(c(parallel::detectCores()-1,1)), pb = txtProgressBar(min=0, max=iterations, style=3),
|
|
maxLikelihoodEpc=TRUE,
|
|
pbUpdate = setTxtProgressBar, outputLoc="./", method="GLUE",lg = FALSE, w=NULL, ...){
|
|
|
|
future::plan(future::multisession(workers = numCores), numCores = numCores)
|
|
file.remove(list.files(path = settings$inputLoc, pattern="progress.txt", recursive = TRUE, full.names=TRUE))
|
|
file.remove(list.files(path = settings$inputLoc, pattern="preservedCalib.csv", recursive = TRUE, full.names=TRUE))
|
|
unlink(file.path(settings$inputLoc,"thread"),recursive=TRUE)
|
|
|
|
# ____ _ _ _ _
|
|
# / ___|_ __ ___ __ _| |_ ___ | |_| |__ _ __ ___ __ _ __| |___
|
|
# | | | '__/ _ \/ _` | __/ _ \ | __| '_ \| '__/ _ \/ _` |/ _` / __|
|
|
# | |___| | | __/ (_| | || __/ | |_| | | | | | __/ (_| | (_| \__ \
|
|
# \____|_| \___|\__,_|\__\___| \__|_| |_|_| \___|\__,_|\__,_|___/
|
|
|
|
|
|
|
|
copyToThreadDirs(thread_prefix, numcores = numCores, runDir = settings$inputLoc)
|
|
|
|
# ____ _ _ _
|
|
# | _ \ _ _ _ __ | |_| |__ _ __ ___ __ _ __| |___
|
|
# | |_) | | | | '_ \ | __| '_ \| '__/ _ \/ _` |/ _` / __|
|
|
# | _ <| |_| | | | | | |_| | | | | | __/ (_| | (_| \__ \
|
|
# |_| \_\\__,_|_| |_| \__|_| |_|_| \___|\__,_|\__,_|___/
|
|
|
|
threadCount <- distributeCores(iterations, numCores)
|
|
|
|
fut <- lapply(1:numCores, function(i) {
|
|
# browser()
|
|
future({
|
|
tryCatch(
|
|
musoSingleThread(measuredData, parameters, startDate,
|
|
sourceFile=settings$epc[2], # EPC SPECIFIC
|
|
endDate, formatString,
|
|
dataVar, outLoc,
|
|
preTag, settings,
|
|
outVars, iterations = threadCount[i],
|
|
skipSpinup, plotName,
|
|
modifyOriginal, likelihood, uncertainity,
|
|
naVal, postProcString, i)
|
|
, error = function(e){
|
|
# browser()
|
|
writeLines(as.character(e),"error.txt")
|
|
writeLines(as.character(iterations),"progress.txt")
|
|
})
|
|
|
|
# musoSingleThread(measuredData, parameters, startDate,
|
|
# endDate, formatString,
|
|
# dataVar, outLoc,
|
|
# preTag, settings,
|
|
# outVars, iterations = threadCount[i],
|
|
# skipSpinup, plotName,
|
|
# modifyOriginal, likelihood, uncertainity,
|
|
# naVal, postProcString, i)
|
|
})
|
|
})
|
|
|
|
# __ ___ _ _
|
|
# \ \ / / |__ __ _| |_ ___| |__ _ __ _ __ ___ ___ ___ ___ ___
|
|
# \ \ /\ / /| '_ \ / _` | __/ __| '_ \ | '_ \| '__/ _ \ / __/ _ \/ __/ __|
|
|
# \ V V / | | | | (_| | || (__| | | | | |_) | | | (_) | (_| __/\__ \__ \
|
|
# \_/\_/ |_| |_|\__,_|\__\___|_| |_| | .__/|_| \___/ \___\___||___/___/
|
|
# |_|
|
|
|
|
getProgress <- function(){
|
|
# threadfiles <- list.files(settings$inputLoc, pattern="progress.txt", recursive = TRUE)
|
|
threadfiles <- list.files(pattern="progress.txt", recursive = TRUE)
|
|
if(length(threadfiles)==0){
|
|
return(0)
|
|
} else {
|
|
sum(sapply(threadfiles, function(x){
|
|
partRes <- readLines(x)
|
|
if(length(partRes)==0){
|
|
return(0)
|
|
} else {
|
|
return(as.numeric(partRes))
|
|
}
|
|
|
|
}))
|
|
|
|
}
|
|
}
|
|
|
|
progress <- 0
|
|
while(progress < iterations){
|
|
Sys.sleep(1)
|
|
progress <- tryCatch(getProgress(), error=function(e){progress})
|
|
if(is.null(pb)){
|
|
pbUpdate(as.numeric(progress))
|
|
} else {
|
|
pbUpdate(pb,as.numeric(progress))
|
|
}
|
|
}
|
|
if(!is.null(pb)){
|
|
close(pb)
|
|
}
|
|
|
|
# ____ _ _
|
|
# / ___|___ _ __ ___ | |__ (_)_ __ ___
|
|
# | | / _ \| '_ ` _ \| '_ \| | '_ \ / _ \
|
|
# | |__| (_) | | | | | | |_) | | | | | __/
|
|
# \____\___/|_| |_| |_|_.__/|_|_| |_|\___|
|
|
resultFiles <- list.files(pattern="preservedCalib.*csv$",recursive=TRUE)
|
|
res0 <- read.csv(grep("thread_1/",resultFiles, value=TRUE),stringsAsFactors=FALSE)
|
|
if(numCores==1){
|
|
results <- res0
|
|
} else {
|
|
resultFilesSans0 <- grep("thread_1/", resultFiles, value=TRUE, invert=TRUE)
|
|
# results <- do.call(rbind,lapply(resultFilesSans0, function(f){read.csv(f, stringsAsFactors=FALSE)}))
|
|
resultsSans0 <- lapply(resultFilesSans0, function(f){read.csv(f, stringsAsFactors=FALSE, header=FALSE)})
|
|
resultsSans0 <- do.call(rbind,resultsSans0)
|
|
colnames(resultsSans0) <- colnames(res0)
|
|
results <- (rbind(res0,resultsSans0))
|
|
}
|
|
|
|
switch(method,
|
|
"GLUE"={
|
|
musoGlue(results, parameters=parameters,settings=settings, w=w, lg=lg)
|
|
liks <- results[,sprintf("%s_likelihood",names(likelihood))]
|
|
epcIndexes <- future::value(fut[[1]], stdout = FALSE, signal=FALSE)
|
|
if(is.null(dim(liks)) || dim(liks)[2] == 1 ){
|
|
ml_place <- which.max(liks)
|
|
} else {
|
|
ml_place <- which.max(as.matrix(liks) %*% as.matrix(w))
|
|
}
|
|
epcVals <- results[ml_place,1:length(epcIndexes)]
|
|
epcPlace <- file.path(dirname(settings$inputFiles),settings$epc)[2]
|
|
changemulline(filePaths= epcPlace, epcIndexes,
|
|
epcVals, src =epcPlace,# settings$epcInput[2],
|
|
outFiles = file.path(outputLoc, "maxLikelihood_epc.epc"))
|
|
names(epcVals) <- epcIndexes
|
|
},
|
|
"agromo"={
|
|
liks <- results[,sprintf("%s_likelihood",names(likelihood))]
|
|
epcIndexes <- future::value(fut[[1]], stdout = FALSE, signal=FALSE)
|
|
epcVals <- results[which.max(liks),1:length(epcIndexes)]
|
|
epcPlace <- file.path(dirname(settings$inputFiles),settings$epc)[2]
|
|
changemulline(filePaths= epcPlace, epcIndexes,
|
|
epcVals, src =epcPlace,# settings$epcInput[2],
|
|
outFiles = file.path(outputLoc, "maxLikelihood_epc.epc"))
|
|
names(epcVals) <- epcIndexes
|
|
xdate <- as.Date(measuredData$date)
|
|
meanM <- measuredData[,sprintf("mean.%s", names(likelihood))]
|
|
minsd <- meanM - measuredData[,sprintf("sd.%s", names(likelihood)[1])]
|
|
maxsd <- meanM + measuredData[,sprintf("sd.%s", names(likelihood)[1])]
|
|
minM <- measuredData[,sprintf("min.%s", names(likelihood)[1])]
|
|
maxM <- measuredData[,sprintf("max.%s", names(likelihood)[1])]
|
|
plot(xdate, minM, type="l", xlab=NA, ylim=c(min(minM)*0.8, max(maxM)*1.1), ylab = names(likelihood)[1])
|
|
lines(xdate, maxM)
|
|
polygon(c(xdate,rev(xdate)),c(minM,rev(maxM)), col="gray",border=NA)
|
|
lines(xdate, minsd)
|
|
lines(xdate, maxsd)
|
|
polygon(c(xdate,rev(xdate)),c(minsd,rev(maxsd)), col="gray30",border=NA)
|
|
points(xdate,meanM)
|
|
|
|
varIndex <- match(as.character(dataVar),settings$dailyVarCodes)
|
|
apriori <- calibMuso(settings)
|
|
modDates <- as.Date(row.names(apriori), format="%d.%m.%Y")
|
|
lines(modDates, apriori[,varIndex],col="brown")
|
|
calibrated <- calibMuso(settings, calibrationPar = as.numeric(names(epcVals)), parameters=epcVals)
|
|
lines(modDates, calibrated[,varIndex],col="blue")
|
|
|
|
},
|
|
stop(sprintf("method: %s not found, please choose from {GLUE, agromo}. See more about this in the documentation of the function!", method))
|
|
)
|
|
}
|
|
|
|
copyToThreadDirs <- function(prefix="thread", numcores=parallel::detectCores()-1, runDir="."){
|
|
dir.create(file.path(runDir,prefix), showWarnings=TRUE)
|
|
fileNames <- grep(".*thread$", list.files(runDir,full.names=TRUE), value=TRUE, invert=TRUE)
|
|
invisible(sapply(1:numcores,function(corenum){
|
|
threadDir <- file.path(runDir,prefix,paste0(prefix,"_",corenum),"")
|
|
dir.create(threadDir, showWarnings=FALSE)
|
|
file.copy(from=fileNames,to=threadDir, overwrite=FALSE, recursive=TRUE)
|
|
}))
|
|
}
|
|
|
|
musoSingleThread <- function(measuredData, parameters = NULL, startDate = NULL,
|
|
sourceFile=NULL,
|
|
endDate = NULL, formatString = "%Y-%m-%d",
|
|
dataVar, outLoc = "./calib",
|
|
preTag = "cal-", settings = setupMuso(),
|
|
outVars = NULL, iterations = 300,
|
|
skipSpinup = TRUE, plotName = "calib.jpg",
|
|
modifyOriginal=TRUE, likelihood, uncertainity = NULL,
|
|
naVal = NULL, postProcString = NULL, threadNumber) {
|
|
|
|
setwd(paste0(settings$inputLoc, "/thread/thread_", threadNumber))
|
|
|
|
iniFiles <- file.path(settings$iniInput)
|
|
# iniFiles <- list.files(pattern=".*ini")
|
|
# if(length(iniFiles)==1){
|
|
# iniFiles <- rep(iniFiles, 2)
|
|
# }
|
|
settings <- setupMuso(iniInput = iniFiles)
|
|
# Exanding likelihood
|
|
|
|
likelihoodFull <- as.list(rep(NA,length(dataVar)))
|
|
names(likelihoodFull) <- names(dataVar)
|
|
if(!missing(likelihood)) {
|
|
lapply(names(likelihood),function(x){
|
|
likelihoodFull[[x]] <<- likelihood[[x]]
|
|
})
|
|
}
|
|
defaultLikelihood <- which(is.na(likelihood))
|
|
if(length(defaultLikelihood)>0){
|
|
likelihoodFull[[defaultLikelihood]] <- (function(x, y){
|
|
exp(-sqrt(mean((x-y)^2)))
|
|
})
|
|
}
|
|
|
|
mdata <- measuredData
|
|
if(is.null(parameters)){
|
|
parameters <- tryCatch(read.csv("parameters.csv", stringsAsFactor=FALSE), error = function (e) {
|
|
stop("You need to specify a path for the parameters.csv, or a matrix.")
|
|
})
|
|
} else {
|
|
if((!is.list(parameters)) & (!is.matrix(parameters))){
|
|
parameters <- tryCatch(read.csv(parameters, stringsAsFactor=FALSE), error = function (e){
|
|
stop("Cannot find neither parameters file neither the parameters matrix")
|
|
})
|
|
}}
|
|
|
|
outLoc <- normalizePath(outLoc)
|
|
outLocPlain <- basename(outLoc)
|
|
currDir <- getwd()
|
|
|
|
if(!dir.exists(outLoc)){
|
|
dir.create(outLoc)
|
|
warning(paste(outLoc," is not exists, so it was created"))
|
|
}
|
|
|
|
outLoc <- normalizePath(outLoc)
|
|
parameterNames <- parameters[,1]
|
|
pretag <- file.path(outLoc,preTag)
|
|
##reading the original epc file at the specified
|
|
## row numbers
|
|
# browser()
|
|
print("optiMuso is randomizing the epc parameters now...",quote = FALSE)
|
|
if(iterations < 3000){
|
|
randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = 3000,sourceFile=sourceFile)
|
|
randVals[[2]]<- randVals[[2]][sample(1:3000,iterations),] # TODO: last not random
|
|
} else {
|
|
randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = iterations,sourceFile=sourceFile)
|
|
}
|
|
|
|
origEpc <- readValuesFromFile(settings$epc[2],randVals[[1]])
|
|
partialResult <- matrix(ncol=length(randVals[[1]])+2*length(dataVar))
|
|
colN <- randVals[[1]]
|
|
colN[match(parameters[,2],randVals[[1]])] <- parameters[,1]
|
|
colN[match(parameters[,2], randVals[[1]])[!is.na(match(parameters[,2],randVals[[1]]))]] <- parameters[,1]
|
|
colnames(partialResult) <- c(colN,sprintf("%s_likelihood",names(dataVar)),
|
|
sprintf("%s_rmse",names(dataVar)))
|
|
numParameters <- length(colN)
|
|
partialResult[1:numParameters] <- origEpc
|
|
## Prepare the preservedCalib matrix for the faster
|
|
## run.
|
|
|
|
pretag <- file.path(outLoc,preTag)
|
|
|
|
musoCodeToIndex <- sapply(dataVar,function(musoCode){
|
|
settings$dailyOutputTable[settings$dailyOutputTable$code == musoCode,"index"]
|
|
})
|
|
resultRange <- (numParameters + 1):(ncol(partialResult))
|
|
## Creating function for generating separate
|
|
## csv files for each run
|
|
|
|
settings$iniInput[2] %>%
|
|
(function(x) paste0(dirname(x),"/",tools::file_path_sans_ext(basename(x)),"-tmp.",tools::file_ext(x))) %>%
|
|
unlink
|
|
randValues <- randVals[[2]]
|
|
|
|
settings$calibrationPar <- randVals[[1]]
|
|
|
|
if(!is.null(naVal)){
|
|
measuredData <- as.data.frame(measuredData)
|
|
measuredData[measuredData == naVal] <- NA
|
|
}
|
|
|
|
alignIndexes <- alignMuso(settings,measuredData)
|
|
if(!is.null(uncertainity)){
|
|
uncert <- measuredData[alignIndexes$meas,uncertainity]
|
|
} else {
|
|
uncert <- NULL
|
|
}
|
|
# browser()
|
|
if(threadNumber == 1){
|
|
origModellOut <- calibMuso(settings=settings, silent=TRUE, skipSpinup = skipSpinup, postProcString=postProcString, modifyOriginal=modifyOriginal)
|
|
partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar,
|
|
mod=origModellOut,
|
|
mes=measuredData,
|
|
likelihoods=likelihood,
|
|
alignIndexes=alignIndexes,
|
|
musoCodeToIndex = musoCodeToIndex,uncert=uncert)
|
|
write.csv(x=origModellOut, file=paste0(pretag, 1, ".csv"))
|
|
write.csv(x=partialResult, file="preservedCalib.csv",row.names=FALSE)
|
|
}
|
|
print("Running the model with the random epc values...", quote = FALSE)
|
|
|
|
# if(!is.null(postProcString)){
|
|
# colNumb <- length(settings$dailyVarCodes) + 1
|
|
# }
|
|
|
|
singleVarCalib <- FALSE
|
|
if(is.null(dim(randValues))){
|
|
singleVarCalib <- TRUE
|
|
}
|
|
|
|
for(i in 2:(iterations+1)){
|
|
|
|
if(!singleVarCalib){
|
|
parameters <- randValues[(i-1),]
|
|
} else {
|
|
parameters <- randValues[i]
|
|
}
|
|
|
|
tmp <- tryCatch(calibMuso(settings = settings,
|
|
parameters = parameters,
|
|
silent= TRUE,
|
|
skipSpinup = skipSpinup, modifyOriginal=modifyOriginal, postProcString = postProcString), error = function (e) NULL)
|
|
if(is.null(tmp)){
|
|
partialResult[,resultRange] <- NA
|
|
} else {
|
|
partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar,
|
|
mod=tmp,
|
|
mes=measuredData,
|
|
likelihoods=likelihood,
|
|
alignIndexes=alignIndexes,
|
|
musoCodeToIndex = musoCodeToIndex, uncert = uncert)
|
|
}
|
|
|
|
partialResult[1:numParameters] <- parameters
|
|
write.table(x=partialResult, file="preservedCalib.csv", append=TRUE, row.names=FALSE,
|
|
sep=",", col.names=FALSE)
|
|
write.csv(x=tmp, file=paste0(pretag, (i+1),".csv"))
|
|
writeLines(as.character(i-1),"progress.txt")
|
|
}
|
|
|
|
if(threadNumber == 1){
|
|
return(randVals[[1]])
|
|
}
|
|
|
|
}
|
|
|
|
distributeCores <- function(iterations, numCores){
|
|
perProcess<- iterations %/% numCores
|
|
numSimu <- rep(perProcess,numCores)
|
|
gainers <- sample(1:numCores, iterations %% numCores)
|
|
numSimu[gainers] <- numSimu[gainers] + 1
|
|
numSimu
|
|
}
|
|
|
|
prepareFromAgroMo <- function(fName){
|
|
obs <- read.table(fName, stringsAsFactors=FALSE, sep = ";", header=T)
|
|
obs <- reshape(obs, timevar="var_id", idvar = "date", direction = "wide")
|
|
dateCols <- apply(do.call(rbind,(strsplit(obs$date, split = "-"))),2,as.numeric)
|
|
colnames(dateCols) <- c("year", "month", "day")
|
|
cbind.data.frame(dateCols, obs)
|
|
}
|
|
|
|
|
|
calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex, uncert){
|
|
|
|
mes <- as.data.frame(mes)
|
|
# NOT COMPATIBLE WITH OLD MEASUREMENT DATA, mes have to be a matrix
|
|
likelihoodRMSE <- sapply(names(dataVar),function(key){
|
|
modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]]
|
|
selected <- grep(sprintf("%s$", key), colnames(mes))
|
|
# browser()
|
|
|
|
measured <- mes[alignIndexes$meas,selected]
|
|
|
|
if(is.null(dim(measured))){
|
|
notNA <- !is.na(measured)
|
|
m <- measured <- measured[notNA]
|
|
|
|
} else {
|
|
notNA <- sapply(1:nrow(measured), function(x){!any(is.na(measured[x,]))})
|
|
measured <- measured[notNA,]
|
|
m <- measured[,grep("^mean", colnames(measured))]
|
|
}
|
|
modelled <- modelled[notNA]
|
|
|
|
# uncert <- uncert[!is.na(measured)]
|
|
|
|
# measured <- measured[!is.na(measured)]
|
|
res <- c(likelihoods[[key]](modelled, measured),
|
|
sqrt(mean((modelled-m)^2))
|
|
)
|
|
# browser()
|
|
res
|
|
})
|
|
names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar))
|
|
return(c(likelihoodRMSE[1,],likelihoodRMSE[2,]))
|
|
}
|
|
|
|
agroLikelihood <- function(modVector,measured){
|
|
mu <- measured[,grep("mean", colnames(measured))]
|
|
stdev <- measured[,grep("^sd", colnames(measured))]
|
|
ndata <- nrow(measured)
|
|
sum(sapply(1:ndata, function(x){
|
|
dnorm(modVector, mu[x], stdev[x], log = TRUE)
|
|
}), na.rm=TRUE)
|
|
}
|
|
|
|
|
|
|
|
maxLikelihoodAgromo <- function (results, imgPath, varName, ...) {
|
|
|
|
}
|