RBBGCMuso/RBBGCMuso/R/musoSensi.R

#' musoSensi
#'
#' This function performs multiple linear regression based global sensitivity analysis using on the output of musoMonte. The algorithm implements the method proposed by Verbeeck et al. 2006 (Tree Physiology 26, 807–817). First the user has to select the parameters of interest with possible minimum and maximum values. After execution musoSensi will then assign weights to the predefined parameters which means that the importance of the parameters will be ranked. The sensitivity analysis calculates the effect of input variability on the output variability in Monte Carlo framework. The result will largely depend on the selected output variable (GPP, evapotranspiration, LAI, soil water content), and on the parameter ranges. Other factors like climate, management and site specific conditions might affect the results.
#' @author Roland HOLLOS
#' @param monteCarloFile If you run the musoMonte function previously, you do not have to re-run the monteCarlo experiment, simply provide the preservedEpc.csv file to musoSensi with its path. If you do not set this parameter, musoSensi will run the musoMonte function to get all necessary information.
#' @param outputFile The filename in which the output of the musoSensi function will be saved. By default it is "sensitivity.csv"
#' @param plotName The name of the output barplot. It's default value is "sensitivity.jpg"
#' @param settings  A list of environmental variables for the Monte Carlo experiment. These settings are generated by the setupMuso function. By default the settings parameter is generated automatically.
#' @param parameters This is a dataframe (heterogeneous data-matrix), where the first column is the name of the parameter, the second is a numeric vector of the rownumbers of the given variable in the input EPC file, and the last two columns describe the minimum and the maximum of the parameter (i.e. the parameter ranges), defining the interval for the randomization.
#' @param calibrationPar You might want to change some parameters in your EPC file before you run the modell. You have to select the appropirate model parameters here. You can refer to the parameters by the number of the line in the EPC file where the variables are defined. The indexing of the lines starts at 1, and each line matters (like in any simple text file). You should use a vector for this selection like c(1,5,8)
#' @param inputDir The location of the input directory for the Biome-BGCMuSo model. This directory must contain a viable pack of all input files and the model executable file.
#' @param iterations Number of the Monte Carlo simulations.
#' @param preTag This defines the name of the output files. This tag will be re-used so that the results will be like preTag-1.csv, preTag-2csv...
#' @param outputType This parameter can be "oneCsv", "moreCsv", and "netCDF". If "oneCsv" is chosen the function creates one large csv file for all of the runs. If "moreCsv" is chosen, every model output goes to separate files. If netCDF is selected the output will be stored in a netCDF file. The default value of the outputTypes is "moreCsv". Note that netCDF is not implemented yet.
#' @param fun If you select a variable from the possible outputs (by using the varIndex parameter), you have to provide a function which maps to a subset of real numbers. The most frequent possibilities are: mean, min, max, var, but you can define any function for your needs.
#' @param varIndex This parameter specifies which parameter will be used for the Monte Carlo experiment from the output list of Biome-BGCMuSo (defined by the INI file). You can extract this information from the INI files. At the output parameter specifications, the parameter order will determine this number. For example, if you have set these output parameters: 412, 874, 926, 888, and you want to use 926 for the experiment, you should specify varIndex as 3.
#' @param skipSpinup With this parameter you can turn off the spinup phase after the first spinup was successfully executed (endpoint file is available). This option can dramatically decrease the time needed for the sensitivity analysis. Note that in case of natural vegetation this option might not be feasible. For croplands this is more feasible.
#' @importFrom ggplot2 geom_bar ggplot aes theme element_text xlab ylab ggtitle ggsave scale_y_continuous
#' @export

musoSensi <- function(monteCarloFile = NULL,
                     parameters = NULL,
                     settings = NULL,
                     inputDir = "./",
                     outLoc = "./calib",
                     outVars = NULL,
                     iterations = 30,
                     preTag = "mont-",
                     outputType = "moreCsv",
                     fun = mean,
                     varIndex = 1,
                     outputFile = "sensitivity.csv",
                     plotName = "sensitivity.png",
                     plotTitle = "Sensitivity",
                     skipSpinup = TRUE,
                     dpi=300){

    if(is.null(parameters)){
        parameters <- tryCatch(read.csv("parameters.csv",stringsAsFactor=FALSE), error = function (e) {
            stop("You need to specify a path for the parameters.csv, or a matrix.")
        })
    } else {
        if((!is.list(parameters)) & (!is.matrix(parameters))){
             parameters <- tryCatch(read.csv(parameters,stringsAsFactor=FALSE), error = function (e){
                                         stop("Cannot find neither parameters file neither the parameters matrix")
                                     })
        }}

    doSensi <- function(M){
        npar <- ncol(M)-1
        M <- M[which(!is.na(M[,ncol(M)])),]
        y <- M[,(npar+1)]
        M <- M[,colnames(M) %in% parameters[,1]]
        npar <- ncol(M)
        M <- apply(M[,1:npar],2,function(x){x-mean(x)})
        varNames<- colnames(M)[1:npar]
        w <- lm(y~M)$coefficients[-1]
        Sv <- apply(M,2,var)
        overalVar <- sum(Sv*w^2,na.rm = TRUE)
        S=numeric(npar)

        for(i in 1:npar){
            S[i] <- ((w[i]^2*Sv[i])/(overalVar))*100
        }

        S <- round(S,digits=2)
        names(S)<-varNames
        write.csv(file = outputFile, x = S)

        sensiPlot <- ggplot(data.frame(name=varNames,y=S/100),aes(x=name,y=y))+
            geom_bar(stat = 'identity')+
            theme(axis.text.x = element_text(angle = 45, hjust = 1))+
            xlab(NULL)+
            ylab(NULL)+
            ggtitle("Sensitivity")+
            scale_y_continuous(labels = scales::percent,limits=c(0,1))
        print(sensiPlot)
        ggsave(plotName,dpi=dpi)
        return(S)
    }


    if(is.null(monteCarloFile)){
        M <- musoMonte(parameters = parameters,
                      settings = settings,
                      inputDir = inputDir,
                      outLoc = outLoc,
                      iterations = iterations,
                      preTag = preTag,
                      outputType = outputType,
                      outVars = outVars,
                      fun = fun,
                      varIndex = varIndex,
                      skipSpinup = skipSpinup
                      )
        M <- cbind(seq_along(M[,1]),M)
        yInd <-  grep("mod.", colnames(M))[varIndex]
        parNames <- grep("mod.",colnames(M), invert=TRUE, value = TRUE)
        M <- M[,c(grep("mod.", colnames(M),invert=TRUE),yInd)]

        return(doSensi(M))

    } else {
        M <- read.csv(monteCarloFile)
        yInd <-  grep("mod.", colnames(M))[varIndex]
        parNames <- grep("mod.",colnames(M), invert=TRUE, value = TRUE)
        M <- M[,c(grep("mod.", colnames(M),invert=TRUE),yInd)]
        return(doSensi(M))
    }
}