RBBGCMuso/docs/CIRM/make_individual_trees.R
2022-04-04 16:08:49 +02:00

132 lines
3.9 KiB
R

library(rpart)
library(rpart.plot)
zero_var <- function(m){
apply(m,2, function(v){
var(v) != 0
})
}
decbin <- function(decnum){
if(decnum < 2){
return(decnum)
}
c(decbin((decnum %/% 2)),decnum %% 2)
}
decpad <- function(decnum,len){
binrep <- decbin(decnum)
c(rep(0,len-length(binrep)),binrep)
}
tree_per_const <- function(results="result.csv",output ="tree_per_const.pdf",
parameters_file="Martonvasar_maize.set"){
varname <-readLines(parameters_file)[1]
parameters <- read.csv(parameters_file,skip=1,stringsAsFactors=FALSE)
results <- read.csv(results, stringsAsFactors=FALSE)
# likelihoods <- results[,ncol(results)-3]
# results <- results[likelihoods>=quantile(likelihoods,0.95),]
len <- round(log(max(results$failType),2))
failTypes <- do.call(rbind,lapply(results$failType,function(x){decpad(x,len)}))
pdf(output)
sapply(1:len, function(const){
nonzero <- results[,1:(ncol(results)-4)]
nonzero <- nonzero[,-1]
nonzero <- nonzero[,zero_var(nonzero)]
colnames(nonzero) <- gsub("__.*","",colnames(nonzero))
constraint <- failTypes[,const]
baseTable <- cbind.data.frame(nonzero,constraint = as.factor(constraint))
try({
rp = rpart(constraint ~ .,data = baseTable)
})
try({
parameters <<- update_parameters_based_on_tree(rp, parameters)
})
try({
rpart.plot(rp)
})
})
dev.off()
outname <- paste0(tools::file_path_sans_ext(parameters_file),"_after_tree.",
tools::file_ext(parameters_file))
writeLines(varname,outname)
write.table(parameters,outname,row.names=FALSE,append=TRUE,sep=",",quote=FALSE)
}
update_parameters_based_on_tree <- function(rp, parameters){
frm <- rp$frame
nodes <- labels(rp)
names(nodes) <- row.names(frm)
node <- get_start_node(frm)
parameters <- change_parameters_on_node(nodes,node,parameters)
while(node !=1){
node <- get_parent_node(node)
if(node == 1){
break()
}
parameters <- change_parameters_on_node(nodes,node,parameters)
}
parameters
}
parse_rule_row <- function(string){
rule_row <- regmatches(string,regexec("([a-zA-Z_0-9]+)([>=< ]+)(.*)",string,perl=TRUE))[[1]][-1]
if(rule_row[2] == ">="){
rule_num <- 1
} else {
rule_num <- 2
}
rule <- list(c(rule_num,as.numeric(rule_row[3])))
names(rule) <- rule_row[1]
return(rule)
}
get_start_node <- function(frm){
nfrm <- frm[frm$yval == 2,]
nfrm <- nfrm[nfrm[,"var"] == "<leaf>",]
pot_start <- as.numeric(row.names(nfrm))[which.max(nfrm$n)]
pot_start
}
get_parent_node <- function(node_id){
as.integer(node_id/2)
}
change_parameters_on_node <- function(nodes,node,parameters2){
crule <- parse_rule_row(nodes[as.character(node)])
minmax <- unlist(parameters2[parameters2[,1] == names(crule),c(3,4)])
if(crule[[1]][1] == 1){
if(minmax[1]<=crule[[1]][2]){
minmax[1] <- crule[[1]][2]
if(minmax[1] <= minmax[2]){
parameters2[parameters2[,1] == names(crule),c(3,4)] <- minmax
} else {
write(sprintf("WARNING: %s's minimum(%s) > maximum(%s)", parameters2[,1],
minmax[1], minmax[2]), "errorlog.txt", append=TRUE)
}
}
} else {
if(minmax[2]>=crule[[1]][2]){
minmax[2] <- crule[[1]][2]
if(minmax[1] <= minmax[2]){
parameters2[parameters2[,1] == names(crule),c(3,4)] <- minmax
} else {
write(sprintf("WARNING: %s's minimum(%s) > maximum(%s)", parameters2[,1],
minmax[1], minmax[2]), "errorlog.txt", append=TRUE)
}
}
}
parameters2
}