diff --git a/RBBGCMuso/NAMESPACE b/RBBGCMuso/NAMESPACE index 4eb00d3..76126a4 100644 --- a/RBBGCMuso/NAMESPACE +++ b/RBBGCMuso/NAMESPACE @@ -1,7 +1,9 @@ # Generated by roxygen2: do not edit by hand export(calibMuso) +export(calibrateMuso) export(changeMusoC) +export(changemulline) export(checkMeteoBGC) export(cleanupMuso) export(compareMuso) @@ -54,6 +56,7 @@ importFrom(dplyr,summarize) importFrom(dplyr,tbl_df) importFrom(ecmwfr,wf_request) importFrom(ecmwfr,wf_set_key) +importFrom(future,future) importFrom(ggplot2,aes) importFrom(ggplot2,aes_string) importFrom(ggplot2,element_blank) diff --git a/RBBGCMuso/R/calibrateMuso.R b/RBBGCMuso/R/calibrateMuso.R new file mode 100644 index 0000000..0deefa5 --- /dev/null +++ b/RBBGCMuso/R/calibrateMuso.R @@ -0,0 +1,390 @@ +#' calibrateMuso +#' +#' This funtion uses the Monte Carlo technique to uniformly sample the parameter space from user defined parameters of the Biome-BGCMuSo model. The sampling algorithm ensures that the parameters are constrained by the model logic which means that parameter dependencies are fully taken into account (parameter dependency means that e.g leaf C:N ratio must be smaller than C:N ratio of litter; more complicated rules apply to the allocation parameters where the allocation fractions to different plant compartments must sum up 1). This function implements a mathematically correct solution to provide uniform distriution for all selected parameters. +#' @author Roland HOLLOS +#' @importFrom future future +#' @export +calibrateMuso <- function(measuredData, parameters = NULL, startDate = NULL, + endDate = NULL, formatString = "%Y-%m-%d", + dataVar, outLoc = "./calib", + preTag = "cal-", settings = setupMuso(), + outVars = NULL, iterations = 100, + skipSpinup = TRUE, plotName = "calib.jpg", + modifyOriginal=TRUE, likelihood, uncertainity = NULL, + naVal = NULL, postProcString = NULL, + thread_prefix="thread", numCores = (parallel::detectCores()-1), pb = txtProgressBar(min=0, max=iterations, style=3), + maxLikelihoodEpc=TRUE, + pbUpdate = setTxtProgressBar, method="GLUE",lg = FALSE, w=NULL, ...){ + + + file.remove(list.files(path = settings$inputLoc, pattern="progress.txt", recursive = TRUE)) + file.remove(list.files(path = settings$inputLoc, pattern="preservedCalib.csv", recursive = TRUE)) + unlink("thread",recursive=TRUE) + + # ____ _ _ _ _ + # / ___|_ __ ___ __ _| |_ ___ | |_| |__ _ __ ___ __ _ __| |___ + # | | | '__/ _ \/ _` | __/ _ \ | __| '_ \| '__/ _ \/ _` |/ _` / __| + # | |___| | | __/ (_| | || __/ | |_| | | | | | __/ (_| | (_| \__ \ + # \____|_| \___|\__,_|\__\___| \__|_| |_|_| \___|\__,_|\__,_|___/ + + + + copyToThreadDirs(thread_prefix, numcores = numCores, runDir = settings$inputLoc) + + # ____ _ _ _ + # | _ \ _ _ _ __ | |_| |__ _ __ ___ __ _ __| |___ + # | |_) | | | | '_ \ | __| '_ \| '__/ _ \/ _` |/ _` / __| + # | _ <| |_| | | | | | |_| | | | | | __/ (_| | (_| \__ \ + # |_| \_\\__,_|_| |_| \__|_| |_|_| \___|\__,_|\__,_|___/ + + threadCount <- distributeCores(iterations, numCores) + + fut <- lapply(1:numCores, function(i) { + # browser() + future({ + tryCatch(musoSingleThread(measuredData, parameters, startDate, + endDate, formatString, + dataVar, outLoc, + preTag, settings, + outVars, iterations = threadCount[i], + skipSpinup, plotName, + modifyOriginal, likelihood, uncertainity, + naVal, postProcString, i), error = function(e){ + writeLines(as.character(iterations),"progress.txt") + }) + + # musoSingleThread(measuredData, parameters, startDate, + # endDate, formatString, + # dataVar, outLoc, + # preTag, settings, + # outVars, iterations = threadCount[i], + # skipSpinup, plotName, + # modifyOriginal, likelihood, uncertainity, + # naVal, postProcString, i) + }) + }) + + # __ ___ _ _ + # \ \ / / |__ __ _| |_ ___| |__ _ __ _ __ ___ ___ ___ ___ ___ + # \ \ /\ / /| '_ \ / _` | __/ __| '_ \ | '_ \| '__/ _ \ / __/ _ \/ __/ __| + # \ V V / | | | | (_| | || (__| | | | | |_) | | | (_) | (_| __/\__ \__ \ + # \_/\_/ |_| |_|\__,_|\__\___|_| |_| | .__/|_| \___/ \___\___||___/___/ + # |_| + + getProgress <- function(){ + # threadfiles <- list.files(settings$inputLoc, pattern="progress.txt", recursive = TRUE) + threadfiles <- list.files(pattern="progress.txt", recursive = TRUE) + if(length(threadfiles)==0){ + return(0) + } else { + sum(sapply(threadfiles, function(x){ + partRes <- readLines(x) + if(length(partRes)==0){ + return(0) + } else { + return(as.numeric(partRes)) + } + + })) + + } + } + + progress <- 0 + while(progress < iterations){ + Sys.sleep(1) + progress <- tryCatch(getProgress(), error=function(e){progress}) + pbUpdate(pb,as.numeric(progress)) + } + close(pb) + + # ____ _ _ + # / ___|___ _ __ ___ | |__ (_)_ __ ___ + # | | / _ \| '_ ` _ \| '_ \| | '_ \ / _ \ + # | |__| (_) | | | | | | |_) | | | | | __/ + # \____\___/|_| |_| |_|_.__/|_|_| |_|\___| + + resultFiles <- list.files(pattern="preservedCalib.*csv$",recursive=TRUE) + res0 <- read.csv(grep("thread_1/",resultFiles, value=TRUE),stringsAsFactors=FALSE) + resultFilesSans0 <- grep("thread_1/", resultFiles, value=TRUE, invert=TRUE) + # results <- do.call(rbind,lapply(resultFilesSans0, function(f){read.csv(f, stringsAsFactors=FALSE)})) + resultsSans0 <- lapply(resultFilesSans0, function(f){read.csv(f, stringsAsFactors=FALSE, header=FALSE)}) + resultsSans0 <- do.call(rbind,resultsSans0) + colnames(resultsSans0) <- colnames(res0) + results <- (rbind(res0,resultsSans0)) + + switch(method, + "GLUE"={ + musoGlue(results, parameters=parameters,settings=settings, w=w, lg=lg) + }, + "agromo"={ + liks <- results[,sprintf("%s_likelihood",names(likelihood))] + epcIndexes <- value(fut[[1]], stdout = FALSE, signal=FALSE) + epcVals <- results[which.max(liks),1:length(epcIndexes)] + changemulline(filePaths= settings$epcInput[2], epcIndexes, epcVals, src = settings$epcInput[2], outFiles = "maxLikelihood_epc.epc") + names(epcVals) <- epcIndexes + xdate <- as.Date(measuredData$date) + meanM <- measuredData[,sprintf("mean.%s", names(likelihood))] + minsd <- meanM - measuredData[,sprintf("sd.%s", names(likelihood)[1])] + maxsd <- meanM + measuredData[,sprintf("sd.%s", names(likelihood)[1])] + minM <- measuredData[,sprintf("min.%s", names(likelihood)[1])] + maxM <- measuredData[,sprintf("max.%s", names(likelihood)[1])] + plot(xdate, minM, type="l", xlab=NA, ylim=c(min(minM)*0.8, max(maxM)*1.1), ylab = names(likelihood)[1]) + lines(xdate, maxM) + polygon(c(xdate,rev(xdate)),c(minM,rev(maxM)), col="gray",border=NA) + lines(xdate, minsd) + lines(xdate, maxsd) + polygon(c(xdate,rev(xdate)),c(minsd,rev(maxsd)), col="gray30",border=NA) + points(xdate,meanM) + + varIndex <- match(as.character(dataVar),settings$dailyVarCodes) + apriori <- calibMuso(settings) + modDates <- as.Date(row.names(apriori), format="%d.%m.%Y") + lines(modDates, apriori[,varIndex],col="brown") + calibrated <- calibMuso(settings, calibrationPar = as.numeric(names(epcVals)), parameters=epcVals) + lines(modDates, calibrated[,varIndex],col="blue") + + }, + stop(sprintf("method: %s not found, please choose from {GLUE, agromo}. See more about this in the documentation of the function!", method)) + ) + # Here starts maxLikelihoodAgroMo: parameters + + + # Here ends maxLikelihoodAgromo + + # return(epcVals) + # ____ _ _ _ _____ + # / ___| | | | | | ____| + # | | _| | | | | | _| + # | |_| | |__| |_| | |___ + # \____|_____\___/|_____| + + # musoGlue("preservedCalib.csv",w=w, lg = lg) +} + +copyToThreadDirs <- function(prefix="thread", numcores=parallel::detectCores()-1, runDir="."){ + dir.create(file.path(runDir,prefix), showWarnings=TRUE) + fileNames <- grep("^thread.*", list.files(runDir), value=TRUE, invert=TRUE) + invisible(sapply(1:numcores,function(corenum){ + threadDir <- file.path(runDir,prefix,paste0(prefix,"_",corenum)) + dir.create(threadDir, showWarnings=FALSE) + file.copy(from=fileNames,to=threadDir, overwrite=FALSE) + })) +} + +musoSingleThread <- function(measuredData, parameters = NULL, startDate = NULL, + endDate = NULL, formatString = "%Y-%m-%d", + dataVar, outLoc = "./calib", + preTag = "cal-", settings = setupMuso(), + outVars = NULL, iterations = 300, + skipSpinup = TRUE, plotName = "calib.jpg", + modifyOriginal=TRUE, likelihood, uncertainity = NULL, + naVal = NULL, postProcString = NULL, threadNumber) { + + setwd(paste0(settings$inputLoc, "/thread/thread_", threadNumber)) + iniFiles <- list.files(pattern=".*ini") + if(length(iniFiles)==1){ + iniFiles <- rep(iniFiles, 2) + } + settings <- setupMuso(iniInput = iniFiles) + # Exanding likelihood + likelihoodFull <- as.list(rep(NA,length(dataVar))) + names(likelihoodFull) <- names(dataVar) + if(!missing(likelihood)) { + lapply(names(likelihood),function(x){ + likelihoodFull[[x]] <<- likelihood[[x]] + }) + } + defaultLikelihood <- which(is.na(likelihood)) + if(length(defaultLikelihood)>0){ + likelihoodFull[[defaultLikelihood]] <- (function(x, y){ + exp(-sqrt(mean((x-y)^2))) + }) + } + + mdata <- measuredData + if(is.null(parameters)){ + parameters <- tryCatch(read.csv("parameters.csv", stringsAsFactor=FALSE), error = function (e) { + stop("You need to specify a path for the parameters.csv, or a matrix.") + }) + } else { + if((!is.list(parameters)) & (!is.matrix(parameters))){ + parameters <- tryCatch(read.csv(parameters, stringsAsFactor=FALSE), error = function (e){ + stop("Cannot find neither parameters file neither the parameters matrix") + }) + }} + + outLoc <- normalizePath(outLoc) + outLocPlain <- basename(outLoc) + currDir <- getwd() + + if(!dir.exists(outLoc)){ + dir.create(outLoc) + warning(paste(outLoc," is not exists, so it was created")) + } + + outLoc <- normalizePath(outLoc) + parameterNames <- parameters[,1] + pretag <- file.path(outLoc,preTag) + ##reading the original epc file at the specified + ## row numbers + print("optiMuso is randomizing the epc parameters now...",quote = FALSE) + if(iterations < 3000){ + randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = 3000) + randVals[[2]]<- randVals[[2]][sample(1:3000,iterations),] + } else { + randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = iterations) + } + + origEpc <- readValuesFromFile(settings$epc[2],randVals[[1]]) + partialResult <- matrix(ncol=length(randVals[[1]])+2*length(dataVar)) + colN <- randVals[[1]] + colN[match(parameters[,2],randVals[[1]])] <- parameters[,1] + colN[match(parameters[,2], randVals[[1]])[!is.na(match(parameters[,2],randVals[[1]]))]] <- parameters[,1] + colnames(partialResult) <- c(colN,sprintf("%s_likelihood",names(dataVar)), + sprintf("%s_rmse",names(dataVar))) + numParameters <- length(colN) + partialResult[1:numParameters] <- origEpc + ## Prepare the preservedCalib matrix for the faster + ## run. + + pretag <- file.path(outLoc,preTag) + + musoCodeToIndex <- sapply(dataVar,function(musoCode){ + settings$dailyOutputTable[settings$dailyOutputTable$code == musoCode,"index"] + }) + resultRange <- (numParameters + 1):(ncol(partialResult)) + ## Creating function for generating separate + ## csv files for each run + + settings$iniInput[2] %>% + (function(x) paste0(dirname(x),"/",tools::file_path_sans_ext(basename(x)),"-tmp.",tools::file_ext(x))) %>% + unlink + randValues <- randVals[[2]] + + settings$calibrationPar <- randVals[[1]] + + if(!is.null(naVal)){ + measuredData <- as.data.frame(measuredData) + measuredData[measuredData == naVal] <- NA + } + + alignIndexes <- alignMuso(settings,measuredData) + if(!is.null(uncertainity)){ + uncert <- measuredData[alignIndexes$meas,uncertainity] + } else { + uncert <- NULL + } + # browser() + if(threadNumber == 1){ + origModellOut <- calibMuso(settings=settings, silent=TRUE, skipSpinup = skipSpinup, postProcString=postProcString, modifyOriginal=modifyOriginal) + partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar, + mod=origModellOut, + mes=measuredData, + likelihoods=likelihood, + alignIndexes=alignIndexes, + musoCodeToIndex = musoCodeToIndex,uncert=uncert) + write.csv(x=origModellOut, file=paste0(pretag, 1, ".csv")) + write.csv(x=partialResult, file="preservedCalib.csv",row.names=FALSE) + } + print("Running the model with the random epc values...", quote = FALSE) + + # if(!is.null(postProcString)){ + # colNumb <- length(settings$dailyVarCodes) + 1 + # } + + + for(i in 2:(iterations+1)){ + + + tmp <- tryCatch(calibMuso(settings = settings, + parameters = randValues[(i-1),], + silent= TRUE, + skipSpinup = skipSpinup, modifyOriginal=modifyOriginal, postProcString = postProcString), error = function (e) NULL) + if(is.null(tmp)){ + partialResult[,resultRange] <- NA + } else { + partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar, + mod=tmp, + mes=measuredData, + likelihoods=likelihood, + alignIndexes=alignIndexes, + musoCodeToIndex = musoCodeToIndex, uncert = uncert) + } + + + partialResult[1:numParameters] <- randValues[(i-1),] + write.table(x=partialResult, file="preservedCalib.csv", append=TRUE, row.names=FALSE, + sep=",", col.names=FALSE) + write.csv(x=tmp, file=paste0(pretag, (i+1),".csv")) + writeLines(as.character(i-1),"progress.txt") + } + + if(threadNumber == 1){ + return(randVals[[1]]) + } + +} + +distributeCores <- function(iterations, numCores){ + perProcess<- iterations %/% numCores + numSimu <- rep(perProcess,numCores) + gainers <- sample(1:numCores, iterations %% numCores) + numSimu[gainers] <- numSimu[gainers] + 1 + numSimu +} + +prepareFromAgroMo <- function(fName){ + obs <- read.table(fName, stringsAsFactors=FALSE, sep = ";", header=T) + obs <- reshape(obs, timevar="var_id", idvar = "date", direction = "wide") + dateCols <- apply(do.call(rbind,(strsplit(obs$date, split = "-"))),2,as.numeric) + colnames(dateCols) <- c("year", "month", "day") + cbind.data.frame(dateCols, obs) +} + + +calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex, uncert){ + + # NOT COMPATIBLE WITH OLD MEASUREMENT DATA, mes have to be a matrix + likelihoodRMSE <- sapply(names(dataVar),function(key){ + # browser() + modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]] + selected <- grep(sprintf("%s$", key), colnames(mes)) + # browser() + measured <- mes[alignIndexes$meas,selected] + notNA <- sapply(1:nrow(measured), function(x){!any(is.na(measured[x,]))}) + modelled <- modelled[notNA] + measured <- measured[notNA,] + # uncert <- uncert[!is.na(measured)] + + # measured <- measured[!is.na(measured)] + apply(measured, 1, function(x){!any(is.na(x))}) + measured <- t(apply(measured, 1, function(x){if(!any(is.na(x))){x}} )) + if(ncol(measured)!=1){ + m <- measured[,grep("^mean", colnames(measured))] + } + res <- c(likelihoods[[key]](modelled, measured), + sqrt(mean((modelled-m)^2)) + ) + # browser() + res + }) + names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar)) + + return(c(likelihoodRMSE[1,],likelihoodRMSE[2,])) +} + +agroLikelihood <- function(modVector,measured){ + mu <- measured[,grep("mean", colnames(measured))] + stdev <- measured[,grep("^sd", colnames(measured))] + ndata <- nrow(measured) + sum(sapply(1:ndata, function(x){ + dnorm(modVector, mu[x], stdev[x], log = TRUE) + }), na.rm=TRUE) +} + + + +maxLikelihoodAgromo <- function (results, imgPath, varName, ...) { + +} diff --git a/RBBGCMuso/R/calibration.R b/RBBGCMuso/R/calibration.R index ce246ef..5c687c3 100644 --- a/RBBGCMuso/R/calibration.R +++ b/RBBGCMuso/R/calibration.R @@ -33,7 +33,7 @@ optiMuso <- function(measuredData, parameters = NULL, startDate = NULL, outVars = NULL, iterations = 30, skipSpinup = TRUE, plotName = "calib.jpg", modifyOriginal=TRUE, likelihood, uncertainity = NULL, - naVal = NULL, postProcString = NULL, w=NULL, lg=FALSE) { + naVal = NULL, postProcString = NULL, w=NULL, lg=FALSE, parallel = TRUE) { # Exanding likelihood likelihoodFull <- as.list(rep(NA,length(dataVar))) names(likelihoodFull) <- names(dataVar) @@ -187,24 +187,24 @@ alignMuso <- function (settings,measuredData) { cbind.data.frame(model=modIndex,meas=measIndex) } -calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex, uncert){ - - likelihoodRMSE <- sapply(names(dataVar),function(key){ - # browser() - modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]] - measured <- mes[alignIndexes$meas,key] - modelled <- modelled[!is.na(measured)] - # uncert <- uncert[!is.na(measured)] - measured <- measured[!is.na(measured)] - res <- c(likelihoods[[key]](modelled, measured, uncert), - sqrt(mean((modelled-measured)^2)) - ) - res - }) - names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar)) - - return(c(likelihoodRMSE[1,],likelihoodRMSE[2,])) -} +# calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex, uncert){ +# +# likelihoodRMSE <- sapply(names(dataVar),function(key){ +# # browser() +# modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]] +# measured <- mes[alignIndexes$meas,key] +# modelled <- modelled[!is.na(measured)] +# # uncert <- uncert[!is.na(measured)] +# measured <- measured[!is.na(measured)] +# res <- c(likelihoods[[key]](modelled, measured, uncert), +# sqrt(mean((modelled-measured)^2)) +# ) +# res +# }) +# names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar)) +# +# return(c(likelihoodRMSE[1,],likelihoodRMSE[2,])) +# } #' musoGlue #' @@ -215,7 +215,12 @@ calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, #' @export musoGlue <- function(presCalFile, w, delta = 0.17, settings=setupMuso(), parameters=read.csv("parameters.csv", stringsAsFactors=FALSE), lg=FALSE){ - preservedCalib<- read.csv(presCalFile) + if(is.data.frame(presCalFile)){ + preservedCalib <- presCalFile + } else { + preservedCalib <- read.csv(presCalFile) + } + paramIndex <- parameters[(match(colnames(preservedCalib),parameters[,1])),2] paramIndex <- paramIndex[!is.na(paramIndex)] paramIndex <- c(paramIndex, @@ -242,7 +247,8 @@ musoGlue <- function(presCalFile, w, delta = 0.17, settings=setupMuso(), paramet parameterIndexes <- 1:(min(likeIndexes)-1) preservedCalib <- preservedCalib[!is.na(preservedCalib$combined),] unfilteredLikelihood <- preservedCalib$combined - preservedCalibtop5 <- preservedCalib[preservedCalib$combined>quantile(preservedCalib$combined,0.95),] + top5points <- preservedCalib$combined>quantile(preservedCalib$combined,0.95) + preservedCalibtop5 <- preservedCalib[,] optRanges <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.05,0.5,0.95)))) pdf("dotplot.pdf") if(lg){ @@ -261,20 +267,40 @@ musoGlue <- function(presCalFile, w, delta = 0.17, settings=setupMuso(), paramet abline(v=optRanges[i,3],col="red") } + par(pari) dev.off() maxParValues <- preservedCalibtop5[which.max(preservedCalibtop5$combined),] maxParIndexes <- paramIndex write.csv(cbind.data.frame(calibrationPar=maxParValues,parameters=maxParIndexes),"maxLikelihood.csv") write.csv(optRanges,"optRanges.csv") - optInterval <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.5-delta,0.5+delta)))) - optParamRange <- cbind.data.frame(rownames(optInterval)[parameterIndexes],as.numeric(paramIndex),optInterval[parameterIndexes,]) - optimalEpc <- musoRand(optParamRange,iterations = 2) - optimalEpc[[2]] <- optimalEpc[[2]][1,] - write.csv(as.data.frame(optimalEpc),"epcOptim.csv") - print(head(optRanges,n=-2)) - calibMuso(calibrationPar=optimalEpc[[1]],parameters=optimalEpc[[2]]) - file.copy(settings$epcInput[2],"epcOptim.epc") + # browser() + # There are some serious problems with this implementation. The uncertainity bouns are not for the parameters, but for the output values. The median is pointwise median for all simulation. + # And the 95 and 5 percentile also. + # dataVec <- preservedCalibtop5$combined + # closestToMedian <- function (dataVec) { + # match(sort(dataVec)[min(which(sort(dataVec)>=median(dataVec)))], dataVec) + # } + # + # while(is.null(optimalEpc)){ + # match(quantile(preservedCalibtop5$combined,0.5), preservedCalibtop5$combined) + # optInterval <-t(apply(preservedCalibtop5,2,function(x) quantile(x,c(0.5-delta,0.5+delta)))) + # optParamRange <- cbind.data.frame(rownames(optInterval)[parameterIndexes],as.numeric(paramIndex),optInterval[parameterIndexes,]) + # optimalEpc <- tryCatch(musoRand(optParamRange,iterations = 2), error=function(e){NULL}) + # delta <- delta*1.05 + # if(delta > 0.5){ + # delta <- 0.5 + # } + # if((delta == 0.5) && is.null(optimalEpc)){ + # stop("cannot find optimal value in the given range") + # } + # } + # print("getOptim") + # optimalEpc[[2]] <- optimalEpc[[2]][1,] + # write.csv(as.data.frame(optimalEpc),"epcOptim.csv") + # print(head(optRanges,n=-2)) + # calibMuso(calibrationPar=optimalEpc[[1]],parameters=optimalEpc[[2]]) + # file.copy(settings$epcInput[2],"epcOptim.epc") } generateOptEpc <- function(optRanges,delta, maxLikelihood=FALSE){ diff --git a/RBBGCMuso/R/changeMuso.R b/RBBGCMuso/R/changeMuso.R index 97b70e7..9a34a0c 100644 --- a/RBBGCMuso/R/changeMuso.R +++ b/RBBGCMuso/R/changeMuso.R @@ -1,14 +1,11 @@ -#' This is the function which is capable to change multiple specific lines to others using their row numbers. +#' changemulline #' #' The function uses the previous changspecline function to operate. - ##From now changespecline is in the forarcheologist file, because itis no longer needed #' #' @author Roland Hollos -#' @keywords internal -#' +#' @export - -changemulline <- function(filePaths, calibrationPar, contents, src){ +changemulline <- function(filePaths, calibrationPar, contents, src, outFiles=filePaths){ # browser() if(is.null(src)){ src <- filePaths @@ -19,7 +16,7 @@ changemulline <- function(filePaths, calibrationPar, contents, src){ fileStringVector <<- changeByIndex(index, content, fileStringVector) }, calibrationPar, contents) - writeLines(fileStringVector, filePaths) + writeLines(fileStringVector, outFiles) } diff --git a/RBBGCMuso/R/setupMuso.R b/RBBGCMuso/R/setupMuso.R index 8873478..ad7d2f5 100644 --- a/RBBGCMuso/R/setupMuso.R +++ b/RBBGCMuso/R/setupMuso.R @@ -68,7 +68,7 @@ setupMuso <- function(executable=NULL, # inputParser <- function(string,fileName,counter,value=TRUE){ - unlist(strsplit(grep(string,fileName,value=TRUE),"[\ \t]"))[counter] + unlist(strsplit(grep(string,fileName,value=TRUE, perl = TRUE),"[\ \t]", useBytes = TRUE))[counter] } # outMaker <- function(inputVar,grepString,filep){ @@ -104,8 +104,8 @@ setupMuso <- function(executable=NULL, #iniChangedp <- FALSE if(is.null(iniInput)){ - spinups<-grep("s.ini$",list.files(inputLoc),value=TRUE) - normals<-grep("n.ini$",list.files(inputLoc),value=TRUE) + spinups<-grep("s.ini$",list.files(inputLoc),value=TRUE, perl = TRUE) + normals<-grep("n.ini$",list.files(inputLoc),value=TRUE, perl = TRUE) if(length(spinups)==1){ iniInput[1] <- file.path(inputLoc,spinups) @@ -124,8 +124,8 @@ setupMuso <- function(executable=NULL, ##read the ini files for the further changes iniFiles<-lapply(iniInput, function (x) readLines(x,-1)) - iniFiles[[1]] <- gsub("\\","/", iniFiles[[1]],fixed=TRUE) #replacing \ to / - iniFiles[[2]] <- gsub("\\","/", iniFiles[[2]],fixed=TRUE) #replacing \ to / + iniFiles[[1]] <- gsub("\\\\","/", iniFiles[[1]], perl = TRUE) #replacing \ to / + iniFiles[[2]] <- gsub("\\\\","/", iniFiles[[2]], perl = TRUE) #replacing \ to / names(iniFiles) <- c("spinup","normal") @@ -156,21 +156,21 @@ setupMuso <- function(executable=NULL, # if(is.null(mapData)){ # - outIndex<-grep("DAILY_OUTPUT",iniFiles[[2]])+1 - numVar<-as.numeric(unlist(strsplit(iniFiles[[2]][outIndex],"[\ \t]"))[1]) + outIndex<-grep("DAILY_OUTPUT",iniFiles[[2]], perl = TRUE)+1 + numVar<-as.numeric(unlist(strsplit(iniFiles[[2]][outIndex],"[\ \t]", useBytes = TRUE))[1]) dailyVarCodes<-tryCatch(iniFiles[[2]][(outIndex+1):(outIndex+numVar)], error = function(e){ stop("Cannot read indexes of output variables from the normal ini file, please make sure you have not skiped a line after the flag: \"DAILY_OUTPUT\"") }) - dailyVarCodes<-unlist(lapply(dailyVarCodes, function(x) unlist(strsplit(x,"[\ \t]"))[1])) - dailyVarnames<-unlist(lapply(dailyVarCodes, function(x) musoMapping(unlist(strsplit(x,"[\ \t]"))[1]))) + dailyVarCodes<-unlist(lapply(dailyVarCodes, function(x) unlist(strsplit(x,"[\ \t]", useBytes = TRUE))[1])) + dailyVarnames<-unlist(lapply(dailyVarCodes, function(x) musoMapping(unlist(strsplit(x,"[\ \t]", useBytes = TRUE))[1]))) - outIndex<-grep("ANNUAL_OUTPUT",iniFiles[[2]])+1 - numVar<-as.numeric(unlist(strsplit(iniFiles[[2]][outIndex],"[\ \t]"))[1]) + outIndex<-grep("ANNUAL_OUTPUT",iniFiles[[2]], perl = TRUE)+1 + numVar<-as.numeric(unlist(strsplit(iniFiles[[2]][outIndex],"[\ \t]", useBytes = TRUE))[1]) annualVarCodes<-iniFiles[[2]][(outIndex+1):(outIndex+numVar)] - annualVarCodes<-unlist(lapply(annualVarCodes, function(x) unlist(strsplit(x,"[\ \t]"))[1])) - annualVarnames<-unlist(lapply(annualVarCodes, function(x) musoMapping(unlist(strsplit(x,"[\ \t]"))[1]))) + annualVarCodes<-unlist(lapply(annualVarCodes, function(x) unlist(strsplit(x,"[\ \t]", useBytes = TRUE))[1])) + annualVarnames<-unlist(lapply(annualVarCodes, function(x) musoMapping(unlist(strsplit(x,"[\ \t]", useBytes = TRUE))[1]))) outputVars<-list(dailyVarnames,annualVarnames) # browser() # } else { @@ -206,8 +206,8 @@ setupMuso <- function(executable=NULL, } outputName <- character(2) - outputName[1] <- basename(unlist(strsplit(iniFiles[[1]][grep("OUTPUT_CONTROL",iniFiles[[1]])+1],"[\ \t]"))[1]) - outputName[2] <- basename(unlist(strsplit(iniFiles[[2]][grep("OUTPUT_CONTROL",iniFiles[[2]])+1],"[\ \t]"))[1]) + outputName[1] <- basename(unlist(strsplit(iniFiles[[1]][grep("OUTPUT_CONTROL",iniFiles[[1]], perl = TRUE)+1],"[\ \t]", useBytes = TRUE))[1]) + outputName[2] <- basename(unlist(strsplit(iniFiles[[2]][grep("OUTPUT_CONTROL",iniFiles[[2]], perl = TRUE)+1],"[\ \t]", useBytes = TRUE))[1]) ## outputName <- unlist(strsplit(grep("output",grep("prefix",iniFiles[[2]],value=TRUE),value=TRUE),"[\ \t]"))[1] ##THIS IS AN UGLY SOLUTION, WHICH NEEDS AN UPGRADE!!! FiXED (2017.09.11) ## outputName <- unlist(strsplit(grep("prefix for output files",iniFiles[[2]],value=TRUE),"[\ \t]"))[1] @@ -220,7 +220,7 @@ setupMuso <- function(executable=NULL, if(is.null(outputLoc)){ ## outputLoc<-paste((rev(rev(unlist(strsplit(outputName,"/")))[-1])),collapse="/") - outputLoc <- dirname(unlist(strsplit(iniFiles[[2]][grep("OUTPUT_CONTROL",iniFiles[[2]])+1],"[\ \t]"))[1]) + outputLoc <- dirname(unlist(strsplit(iniFiles[[2]][grep("OUTPUT_CONTROL",iniFiles[[2]], perl = TRUE)+1],"[\ \t]", useBytes = TRUE))[1]) if(substr(outputLoc,start = 1,stop = 1)!="/"){ ##if the outputName is not absolute path make it absolute outputLoc <- file.path(inputLoc,outputLoc) @@ -233,12 +233,12 @@ setupMuso <- function(executable=NULL, inputFiles<-c(iniInput,epcInput,metInput) numData<-rep(NA,3) - numYears <- as.numeric(unlist(strsplit(iniFiles[[2]][grep("TIME_DEFINE",iniFiles[[2]])+1],"[\ \t]"))[1]) + numYears <- as.numeric(unlist(strsplit(iniFiles[[2]][grep("TIME_DEFINE",iniFiles[[2]], perl = TRUE)+1],"[\ \t]", useBytes = TRUE))[1]) ## numYears<-unlist(read.table(iniInput[2],skip = 14,nrows = 1)[1]) - numValues <- as.numeric(unlist(strsplit(iniFiles[[2]][grep("DAILY_OUTPUT",iniFiles[[2]])+1],"[\ \t]"))[1]) + numValues <- as.numeric(unlist(strsplit(iniFiles[[2]][grep("DAILY_OUTPUT",iniFiles[[2]], perl = TRUE)+1],"[\ \t]", useBytes = TRUE))[1]) ## numValues will be replaced to numVar ## numValues<-unlist(read.table(iniInput[2],skip=102,nrows = 1)[1]) - startYear <- as.numeric(unlist(strsplit(iniFiles[[2]][grep("TIME_DEFINE",iniFiles[[2]])+2],"[\ \t]"))[1]) + startYear <- as.numeric(unlist(strsplit(iniFiles[[2]][grep("TIME_DEFINE",iniFiles[[2]], perl = TRUE)+2],"[\ \t]", useBytes = TRUE))[1]) numData[1] <- numValues * numYears * 365 # Have to corrigate leapyears numData[2] <- numYears * numValues*12 @@ -263,9 +263,9 @@ setupMuso <- function(executable=NULL, searchBellow <- function(inFile, key, stringP = TRUE, n=1, management = FALSE){ if(stringP){ - unlist(strsplit(inFile[grep(key,inFile,perl=TRUE)+n],split = "\\s+"))[1] + unlist(strsplit(inFile[grep(key,inFile, perl=TRUE)+n],split = "\\s+", useBytes = TRUE))[1] } else { - as.numeric(unlist(strsplit(inFile[grep(key,inFile,perl=TRUE)+n],split = "\\s+"))[1]) + as.numeric(unlist(strsplit(inFile[grep(key,inFile,perl=TRUE)+n],split = "\\s+", useBytes = TRUE))[1]) } } soilFile <- NULL diff --git a/RBBGCMuso/R/thread-1 b/RBBGCMuso/R/thread-1 new file mode 100644 index 0000000..29d6383 --- /dev/null +++ b/RBBGCMuso/R/thread-1 @@ -0,0 +1 @@ +100 diff --git a/RBBGCMuso/R/thread-2 b/RBBGCMuso/R/thread-2 new file mode 100644 index 0000000..29d6383 --- /dev/null +++ b/RBBGCMuso/R/thread-2 @@ -0,0 +1 @@ +100 diff --git a/RBBGCMuso/R/thread-3 b/RBBGCMuso/R/thread-3 new file mode 100644 index 0000000..29d6383 --- /dev/null +++ b/RBBGCMuso/R/thread-3 @@ -0,0 +1 @@ +100 diff --git a/RBBGCMuso/R/thread-4 b/RBBGCMuso/R/thread-4 new file mode 100644 index 0000000..29d6383 --- /dev/null +++ b/RBBGCMuso/R/thread-4 @@ -0,0 +1 @@ +100 diff --git a/RBBGCMuso/man/calibrateMuso.Rd b/RBBGCMuso/man/calibrateMuso.Rd new file mode 100644 index 0000000..5710638 --- /dev/null +++ b/RBBGCMuso/man/calibrateMuso.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calibrateMuso.R +\name{calibrateMuso} +\alias{calibrateMuso} +\title{calibrateMuso} +\usage{ +calibrateMuso( + measuredData, + parameters = NULL, + startDate = NULL, + endDate = NULL, + formatString = "\%Y-\%m-\%d", + dataVar, + outLoc = "./calib", + preTag = "cal-", + settings = setupMuso(), + outVars = NULL, + iterations = 100, + skipSpinup = TRUE, + plotName = "calib.jpg", + modifyOriginal = TRUE, + likelihood, + uncertainity = NULL, + naVal = NULL, + postProcString = NULL, + thread_prefix = "thread", + numCores = (parallel::detectCores() - 1), + pb = txtProgressBar(min = 0, max = iterations, style = 3), + maxLikelihoodEpc = TRUE, + pbUpdate = setTxtProgressBar +) +} +\description{ +This funtion uses the Monte Carlo technique to uniformly sample the parameter space from user defined parameters of the Biome-BGCMuSo model. The sampling algorithm ensures that the parameters are constrained by the model logic which means that parameter dependencies are fully taken into account (parameter dependency means that e.g leaf C:N ratio must be smaller than C:N ratio of litter; more complicated rules apply to the allocation parameters where the allocation fractions to different plant compartments must sum up 1). This function implements a mathematically correct solution to provide uniform distriution for all selected parameters. +} +\author{ +Roland HOLLOS +} diff --git a/RBBGCMuso/man/changemulline.Rd b/RBBGCMuso/man/changemulline.Rd index e7ec966..848e443 100644 --- a/RBBGCMuso/man/changemulline.Rd +++ b/RBBGCMuso/man/changemulline.Rd @@ -2,16 +2,9 @@ % Please edit documentation in R/changeMuso.R \name{changemulline} \alias{changemulline} -\title{This is the function which is capable to change multiple specific lines to others using their row numbers.} +\title{changemulline} \usage{ -changemulline( - filePaths, - calibrationPar, - contents, - fileOut, - fileToChange, - modifyOriginal = FALSE -) +changemulline(filePaths, calibrationPar, contents, src) } \description{ The function uses the previous changspecline function to operate. @@ -19,4 +12,3 @@ The function uses the previous changspecline function to operate. \author{ Roland Hollos } -\keyword{internal} diff --git a/RBBGCMuso/man/musoGlue.Rd b/RBBGCMuso/man/musoGlue.Rd index b4960e7..c3b878c 100644 --- a/RBBGCMuso/man/musoGlue.Rd +++ b/RBBGCMuso/man/musoGlue.Rd @@ -4,7 +4,14 @@ \alias{musoGlue} \title{musoGlue} \usage{ -musoGlue(preservedCalib, w) +musoGlue( + presCalFile, + w, + delta = 0.17, + settings = setupMuso(), + parameters = read.csv("parameters.csv", stringsAsFactors = FALSE), + lg = FALSE +) } \arguments{ \item{plotName}{u} diff --git a/RBBGCMuso/man/optiMuso.Rd b/RBBGCMuso/man/optiMuso.Rd index d338cbc..6543dc9 100644 --- a/RBBGCMuso/man/optiMuso.Rd +++ b/RBBGCMuso/man/optiMuso.Rd @@ -20,9 +20,12 @@ optiMuso( plotName = "calib.jpg", modifyOriginal = TRUE, likelihood, + uncertainity = NULL, naVal = NULL, postProcString = NULL, - w = NULL + w = NULL, + lg = FALSE, + parallel = TRUE ) } \arguments{ diff --git a/calibrateMuso.R b/calibrateMuso.R new file mode 100644 index 0000000..46c46e8 --- /dev/null +++ b/calibrateMuso.R @@ -0,0 +1,295 @@ +#' calibrateMuso +#' +#' This funtion uses the Monte Carlo technique to uniformly sample the parameter space from user defined parameters of the Biome-BGCMuSo model. The sampling algorithm ensures that the parameters are constrained by the model logic which means that parameter dependencies are fully taken into account (parameter dependency means that e.g leaf C:N ratio must be smaller than C:N ratio of litter; more complicated rules apply to the allocation parameters where the allocation fractions to different plant compartments must sum up 1). This function implements a mathematically correct solution to provide uniform distriution for all selected parameters. +#' @author Roland HOLLOS +#' @export +calibrateMuso <- function(measuredData, parameters = NULL, startDate = NULL, + endDate = NULL, formatString = "%Y-%m-%d", + dataVar, outLoc = "./calib", + preTag = "cal-", settings = setupMuso(), + outVars = NULL, iterations = 30, + skipSpinup = TRUE, plotName = "calib.jpg", + modifyOriginal=TRUE, likelihood, uncertainity = NULL, + naVal = NULL, postProcString = NULL, + tread_prefix="thread", numcores = (parallel::detectCores()-1), pb = txtProgressBar(min=0, max=iterations, style=3), + pbUpdate = setTxtProgressBar){ + + # ____ _ _ _ _ + # / ___|_ __ ___ __ _| |_ ___ | |_| |__ _ __ ___ __ _ __| |___ + # | | | '__/ _ \/ _` | __/ _ \ | __| '_ \| '__/ _ \/ _` |/ _` / __| + # | |___| | | __/ (_| | || __/ | |_| | | | | | __/ (_| | (_| \__ \ + # \____|_| \___|\__,_|\__\___| \__|_| |_|_| \___|\__,_|\__,_|___/ + + + + copyToThreadDirs(thread_prefix, numcores = numcores, runDir = settings$inputLoc) + + # ____ _ _ _ + # | _ \ _ _ _ __ | |_| |__ _ __ ___ __ _ __| |___ + # | |_) | | | | '_ \ | __| '_ \| '__/ _ \/ _` |/ _` / __| + # | _ <| |_| | | | | | |_| | | | | | __/ (_| | (_| \__ \ + # |_| \_\\__,_|_| |_| \__|_| |_|_| \___|\__,_|\__,_|___/ + + threadCount <- distributeCores(iterations, numCores) + + fut <- lapply(1:numCores, function(i) { + # future({ + musoSingleThread(measuredData, parameters, startDate, + endDate, formatString, + dataVar, outLoc, + preTag, settings, + outVars, iterations = threadCount[i], + skipSpinup, plotName, + modifyOriginal, likelihood, uncertainity, + naVal, postProcString, i) + # }) + }) + + # __ ___ _ _ + # \ \ / / |__ __ _| |_ ___| |__ _ __ _ __ ___ ___ ___ ___ ___ + # \ \ /\ / /| '_ \ / _` | __/ __| '_ \ | '_ \| '__/ _ \ / __/ _ \/ __/ __| + # \ V V / | | | | (_| | || (__| | | | | |_) | | | (_) | (_| __/\__ \__ \ + # \_/\_/ |_| |_|\__,_|\__\___|_| |_| | .__/|_| \___/ \___\___||___/___/ + # |_| + + getProgress <- function(){ + threadfiles <- list.files(settings$inputLoc, pattern="progress.txt", recursive = TRUE) + if(length(threadfiles)==0){ + return(0) + } else { + sum(sapply(threadfiles, function(x){ + partRes <- readLines(x) + if(length(partRes)==0){ + return(0) + } else { + return(as.numeric(partRes)) + } + + })) + + } + } + + progress <- 0 + while(progress < 400){ + Sys.sleep(1) + progress <- getProgress() + pbUpdate(pb,as.numeric(progress)) + } + close(pb) + + # ____ _ _ + # / ___|___ _ __ ___ | |__ (_)_ __ ___ + # | | / _ \| '_ ` _ \| '_ \| | '_ \ / _ \ + # | |__| (_) | | | | | | |_) | | | | | __/ + # \____\___/|_| |_| |_|_.__/|_|_| |_|\___| + + + # ____ _ _ _ _____ + # / ___| | | | | | ____| + # | | _| | | | | | _| + # | |_| | |__| |_| | |___ + # \____|_____\___/|_____| + + + # musoGlue("preservedCalib.csv",w=w, lg = lg) +} + +copyToThreadDirs <- function(prefix="thread", numcores=parallel::detectCores()-1, runDir="."){ + dir.create(file.path(runDir,prefix), showWarnings=TRUE) + fileNames <- grep("^thread.*", list.files(runDir), value=TRUE, invert=TRUE) + invisible(sapply(1:numcores,function(corenum){ + threadDir <- file.path(runDir,prefix,paste0(prefix,"_",corenum)) + dir.create(threadDir, showWarnings=FALSE) + file.copy(from=fileNames,to=threadDir, overwrite=FALSE) + })) +} + +musoSingleThread <- function(measuredData, parameters = NULL, startDate = NULL, + endDate = NULL, formatString = "%Y-%m-%d", + dataVar, outLoc = "./calib", + preTag = "cal-", settings = setupMuso(), + outVars = NULL, iterations = 30, + skipSpinup = TRUE, plotName = "calib.jpg", + modifyOriginal=TRUE, likelihood, uncertainity = NULL, + naVal = NULL, postProcString = NULL, threadNumber) { + + setwd(paste0("thread/thread-",threadNumber)) + # Exanding likelihood + likelihoodFull <- as.list(rep(NA,length(dataVar))) + names(likelihoodFull) <- names(dataVar) + if(!missing(likelihood)) { + lapply(names(likelihood),function(x){ + likelihoodFull[[x]] <<- likelihood[[x]] + }) + } + defaultLikelihood <- which(is.na(likelihood)) + if(length(defaultLikelihood)>0){ + likelihoodFull[[defaultLikelihood]] <- (function(x, y){ + exp(-sqrt(mean((x-y)^2))) + }) + } + + mdata <- measuredData + if(is.null(parameters)){ + parameters <- tryCatch(read.csv("parameters.csv", stringsAsFactor=FALSE), error = function (e) { + stop("You need to specify a path for the parameters.csv, or a matrix.") + }) + } else { + if((!is.list(parameters)) & (!is.matrix(parameters))){ + parameters <- tryCatch(read.csv(parameters, stringsAsFactor=FALSE), error = function (e){ + stop("Cannot find neither parameters file neither the parameters matrix") + }) + }} + + outLoc <- normalizePath(outLoc) + outLocPlain <- basename(outLoc) + currDir <- getwd() + + if(!dir.exists(outLoc)){ + dir.create(outLoc) + warning(paste(outLoc," is not exists, so it was created")) + } + + outLoc <- normalizePath(outLoc) + parameterNames <- parameters[,1] + pretag <- file.path(outLoc,preTag) + ##reading the original epc file at the specified + ## row numbers + print("optiMuso is randomizing the epc parameters now...",quote = FALSE) + if(iterations < 3000){ + randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = 3000) + randVals[[2]]<- randVals[[2]][sample(1:3000,iterations),] + } else { + randVals <- musoRand(parameters = parameters,constrains = NULL, iterations = iterations) + } + + origEpc <- readValuesFromFile(settings$epc[2],randVals[[1]]) + partialResult <- matrix(ncol=length(randVals[[1]])+2*length(dataVar)) + colN <- randVals[[1]] + colN[match(parameters[,2],randVals[[1]])] <- parameters[,1] + colnames(partialResult) <- c(colN,sprintf("%s_likelihood",names(dataVar)), + sprintf("%s_rmse",names(dataVar))) + numParameters <- length(colN) + partialResult[1:numParameters] <- origEpc + ## Prepare the preservedCalib matrix for the faster + ## run. + + pretag <- file.path(outLoc,preTag) + + musoCodeToIndex <- sapply(dataVar,function(musoCode){ + settings$dailyOutputTable[settings$dailyOutputTable$code == musoCode,"index"] + }) + resultRange <- (numParameters + 1):(ncol(partialResult)) + ## Creating function for generating separate + ## csv files for each run + + progBar <- txtProgressBar(1,iterations,style=3) + settings$iniInput[2] %>% + (function(x) paste0(dirname(x),"/",tools::file_path_sans_ext(basename(x)),"-tmp.",tools::file_ext(x))) %>% + unlink + randValues <- randVals[[2]] + + settings$calibrationPar <- randVals[[1]] + + if(!is.null(naVal)){ + measuredData <- as.data.frame(measuredData) + measuredData[measuredData == naVal] <- NA + } + + alignIndexes <- alignMuso(settings,measuredData) + if(!is.null(uncertainity)){ + uncert <- measuredData[alignIndexes$meas,uncertainity] + } else { + uncert <- NULL + } + # browser() + origModellOut <- calibMuso(settings=settings, silent=TRUE, skipSpinup = skipSpinup, postProcString=postProcString, modifyOriginal=modifyOriginal) + partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar, + mod=origModellOut, + mes=measuredData, + likelihoods=likelihood, + alignIndexes=alignIndexes, + musoCodeToIndex = musoCodeToIndex,uncert=uncert) + write.csv(x=origModellOut, file=paste0(pretag, 1, ".csv")) + print("Running the model with the random epc values...", quote = FALSE) + + # if(!is.null(postProcString)){ + # colNumb <- length(settings$dailyVarCodes) + 1 + # } + + write.csv(x=partialResult, file="preservedCalib.csv",row.names=FALSE) + for(i in 2:(iterations+1)){ + # browser() + tmp <- tryCatch(calibMuso(settings = settings, + parameters = randValues[(i-1),], + silent= TRUE, + skipSpinup = skipSpinup, modifyOriginal=modifyOriginal, postProcString = postProcString), error = function (e) NULL) + if(is.null(tmp)){ + partialResult[,resultRange] <- NA + } else { + partialResult[,resultRange] <- calcLikelihoodsAndRMSE(dataVar=dataVar, + mod=tmp, + mes=measuredData, + likelihoods=likelihood, + alignIndexes=alignIndexes, + musoCodeToIndex = musoCodeToIndex, uncert = uncert) + } + + + partialResult[1:numParameters] <- randValues[(i-1),] + write.table(x=partialResult, file="preservedCalib.csv", append=TRUE, row.names=FALSE, + sep=",", col.names=FALSE) + write.csv(x=tmp, file=paste0(pretag, (i+1),".csv")) + writeLines(as.character(i),"progress.txt") + } + +} + +distributeCores <- function(iterations, numCores){ + perProcess<- iterations %/% numCores + numSimu <- rep(perProcess,numCores) + gainers <- sample(1:numCores, iterations %% numCores) + numSimu[gainers] <- numSimu[gainers] + 1 + numSimu +} + +prepareFromAgroMo <- function(fName){ + obs <- read.table(fName, stringsAsFactors=FALSE, sep = ";", header=T) + obs <- reshape(obs, timevar="var_id", idvar = "date", direction = "wide") + dateCols <- apply(do.call(rbind,(strsplit(obs$date, split = "-"))),2,as.numeric) + colnames(dateCols) <- c("year", "month", "day") + cbind.data.frame(dateCols, obs) +} + + +calcLikelihoodsAndRMSE <- function(dataVar, mod, mes, likelihoods, alignIndexes, musoCodeToIndex, uncert){ + + likelihoodRMSE <- sapply(names(dataVar),function(key){ + # browser() + modelled <- mod[alignIndexes$mod,musoCodeToIndex[key]] + selected <- grep(sprintf("%s$", key), colnames(mes)) + measured <- mes[alignIndexes$meas,selected] + modelled <- modelled[!is.na(measured)] + # uncert <- uncert[!is.na(measured)] + measured <- measured[!is.na(measured)] + res <- c(likelihoods[[key]](modelled, measured, uncert), + sqrt(mean((modelled-measured)^2)) + ) + res + }) + names(likelihoodRMSE) <- c(sprintf("%s_likelihood",dataVar), sprintf("%s_rmse",dataVar)) + + return(c(likelihoodRMSE[1,],likelihoodRMSE[2,])) +} + +agroLikelihood <- function(modVector,measured){ + mu <- measured[,grep("^mean", colnames(measured))] + stdev <- measured[,grep("^sd", colnames(measured))] + ndata <- nrow(measured) + sum(sapply(1:ndata, function(x){ + dnorm(modVector, mu[x], stdev[x], log = TRUE) + })) +} + +# prepareFromAgroMo("/home/hollorol/agromo/calibration/martonvasar/MV_highN.obs") diff --git a/parallelRun.R b/parallelRun.R new file mode 100644 index 0000000..05da565 --- /dev/null +++ b/parallelRun.R @@ -0,0 +1,90 @@ +library(parallel) +library('future') +plan(multiprocess) +library('RBBGCMuso') + +a <- tempdir() +setwd(a) +file.copy(from="~/R/x86_64-pc-linux-gnu-library/3.5/RBBGCMuso/examples/hhs",to="./", recursive = TRUE) +setwd("hhs") +list.files() + +settings <- setupMuso() +setupMuso() +settings$outputLoc + + +copyToThreadDirs <- function(prefix="thread", numcores=parallel::detectCores()-1, runDir="."){ + dir.create(file.path(runDir,prefix), showWarnings=TRUE) + fileNames <- grep("^thread.*", list.files(runDir), value=TRUE, invert=TRUE) + invisible(sapply(1:numcores,function(corenum){ + threadDir <- file.path(runDir,prefix,paste0(prefix,"_",corenum)) + dir.create(threadDir, showWarnings=FALSE) + file.copy(from=fileNames,to=threadDir, overwrite=FALSE) + })) +} + +copyToThreadDirs() +unlink("thread", recursive=TRUE) + +procFun <- function(index){ + progressState <- tempfile(pattern=paste("thread",index,sep="-", tmpdir="./")) + for(i in 1:100){ + Sys.sleep(1) + writeLines(as.character(i),paste("thread",index,sep="-")) + } +} + +futu <- vector(mode="list", length=4) +names(futu) <- 1:4 +futu + + +getProgress <- function(){ + threadfiles <- list.files(pattern="thread*") + if(length(threadfiles)==0){ + return(0) + } else { + sum(sapply(threadfiles, function(x){ + partRes <- readLines(x) + if(length(partRes)==0){ + return(0) + } else { + return(as.numeric(partRes)) + } + + })) + + } +} + +getProgress() +futu +list.files() +readLines("threadi-1") +procFun(8) +file.remove(pattern="thread*") +file.remove((list.files(pattern="thread*"))) + + +wachProgress <- function(){ + progress <- 0 + while(progress < 400){ + Sys.sleep(1) + progress <- getProgress() + print(paste(as.numeric(progress)/400*100,"%")) + } +} + +for(i in 1:4){ + futu[[as.character(i)]] <- future({procFun(i)}) +} +lapply(1:4,function(i) future({procFun(i)})) +pb <- txtProgressBar(min=0,max=400,style=3) +progress <- 0 +while(progress < 400){ + Sys.sleep(1) + progress <- getProgress() + setTxtProgressBar(pb,as.numeric(progress)) +} +close(pb)