##### simulation algorithms ------------ ### turning bands ----------- ## move from geostats.R here ### LU Decomposition ----------- ## TODO ### direct sampling ------ gsi.DS4CoDa <- function(n, f, t, n_realiz, nx_TI, ny_TI, nx_SimGrid, ny_SimGrid, TI_input, SimGrid_input, V = "ilr", ivars_TI = 3:ncol(TI_input), W = t(gsi.produceV(V=V, D=length(ivars_TI), giveInv = TRUE)@W), SimGrid_mask = ncol(SimGrid_input), invertMask = TRUE ){ .Deprecated(new = "gsi.DS", msg="gsi.DS4CoDa is deprecated; use gsi.DS or go via make.gm*-functions followed by DSpars and predict for gmSpatialModel") ### extract elements # mask if(length(SimGrid_mask)==1){ mask = as.logical(SimGrid_input[, SimGrid_mask]) if(is.numeric(SimGrid_mask)) SimGrid_input = SimGrid_input[,-SimGrid_mask] if(is.character(SimGrid_mask)) SimGrid_input = SimGrid_input[,setdiff(SimGrid_mask,colnames(SimGrid_input))] }else if(length(SimGrid_mask)==nrow(SimGrid_input)){ mask = as.logical(SimGrid_mask) }else stop("gsi.DS4CoDa: SimGrid_mask not interpretable") if(invertMask) mask = !mask ### Prepare data matrices and eventually, transform if(V=="I"){ ## case "no transformation" D=length(ivars_TI) # TI TI_ilr <- matrix(data = NA, nrow = nrow(TI_input), ncol = D) TI_ilr[which(complete.cases(TI_input)),] <- TI_input[which(complete.cases(TI_input)), ivars_TI] # conditioning data + simgrid SimGrid_ilr <- matrix(data = NA, nrow = nrow(SimGrid_input), ncol = D) if (length(which(complete.cases(SimGrid_input)))>=1){ SimGrid_ilr[which(complete.cases(SimGrid_input)),] <- SimGrid_input[which(complete.cases(SimGrid_input)), ivars_TI] } }else{ ## case "transform" # interpret V V = gsi.produceV(V=V, D=length(ivars_TI)) # make space TI_ilr <- matrix(data = NA,nrow = nrow(TI_input),ncol = ncol(V)) SimGrid_ilr <- matrix(data = NA, nrow = nrow(SimGrid_input), ncol = length(ivars_TI)-1) # transform TI TI_ilr[which(complete.cases(TI_input)),] <- ilr(TI_input[which(complete.cases(TI_input)),ivars_TI], V=V) # transform conditioning data if (length(which(complete.cases(SimGrid_input)))>=1){ SimGrid_ilr[which(complete.cases(SimGrid_input)),] <- ilr(SimGrid_input[which(complete.cases(SimGrid_input)), ivars_TI]) } } # Array to store realizations SimGrid_ilr <- replicate(n_realiz, SimGrid_ilr) # If conditioning data is not provided, proceed with nonconditional simulation tk0 = complete.cases(SimGrid_input) if (sum(tk0)<n){ for (i in 1:n_realiz){ nmissing = n-sum(tk0) stk = sample(x=which(mask &!tk0), size=nmissing) SimGrid_ilr[stk,,i] <- TI_ilr[sample(x=which(complete.cases(TI_ilr)),size = nmissing),] } } # Compositional range tkTI = complete.cases(TI_ilr) CRange <- max(dist(TI_ilr[tkTI,])) # Change TI to an array TI_ilr_array <- array(as.vector(TI_ilr),dim = c(nx_TI,ny_TI, ncol(TI_ilr))) # List to store realization SimGrid_ilr_list <- list() for (i in 1:n_realiz){ SimGrid_ilr_list[[i]] <- array(as.vector(SimGrid_ilr[,,i]), dim = c(nx_SimGrid,ny_SimGrid,ncol(SimGrid_ilr))) } # matrix of the informed nodes in the training image mInformedTI <- which(!is.na(TI_ilr_array[,,1]), arr.ind = TRUE) # array of nodes to be simulated maskArray <- array(mask, dim = c(nx_SimGrid,ny_SimGrid,1)) #pb = list() #myfun = function(ii){ for(ii in 1:n_realiz){ cat(paste("\n Realization number #",ii, "\n")) # Defining a fully random path for simulation list_sim <- which(maskArray[,,1] & is.na(SimGrid_ilr_list[[ii]][,,1]), arr.ind = TRUE) path_sim <- list_sim[sample(nrow(list_sim)),] # initialize progress bar pb <- utils::txtProgressBar(min = 0, max = nrow(path_sim)*f*nrow(mInformedTI), style = 3) status <- 0 # Looping simulation nodes for (simnod in 1:nrow(path_sim)){ path_this_sim = path_sim[simnod,] # Finding the n closest compositions (hard or simulated) to build the data event tki = !is.na(SimGrid_ilr_list[[ii]][,,1]) dataevesim_discode <- FNN::get.knnx( data=which(tki,arr.ind = TRUE), t(as.matrix(path_this_sim)), k=n, algorithm=c("kd_tree") ) dataevesim_loc <- which(tki, arr.ind = TRUE)[dataevesim_discode$nn.index,] dataevesim <- mapply(function(i, j) SimGrid_ilr_list[[ii]][i, j, 1:ncol(TI_ilr)], dataevesim_loc[,1], dataevesim_loc[,2]) dataevesim_vec <- dataevesim_loc - matrix(rep( t(as.matrix(path_this_sim)),each=n),nrow=n) # Scanning TI for a close pattern path_TI <- mInformedTI[sample(nrow(mInformedTI)),] # Initial best distance is set to inf. Update with every best distance encountered mindist <- Inf # Number of tries in the TI nb_of_tries <- ceiling(nrow(path_TI)*f) # Store best pattern encountered so far BestPoint <- matrix(data = NA,nrow = 1,ncol = 2) for (tinod in 1:nb_of_tries){ # update progress bar status = status + 1 utils::setTxtProgressBar(pb, status) # Building training pattern and measuring distance dataeveti_loc <- dataevesim_vec + matrix(rep( t(as.matrix(path_TI[tinod,])),each=n),nrow=n) outwin <- dataeveti_loc[,1] <= nx_TI & dataeveti_loc[,2] <= ny_TI & dataeveti_loc[,1] > 0 & dataeveti_loc[,2] > 0 if(sum(outwin)==0){next} dataeveti <- mapply(function(i, j) TI_ilr_array[i, j, 1:ncol(TI_ilr)], dataeveti_loc[outwin,1], dataeveti_loc[outwin,2]) if(sum(is.na(dataeveti[1,]))>=ncol(dataeveti)){next} mydist <- mean(sqrt(colSums((dataevesim[,outwin] - dataeveti)^2))/CRange,na.rm = TRUE) # Checking for the minimum distance found so far if (mydist < mindist){ mindist <- mydist BestPoint <- t(as.matrix(path_TI[tinod,])) } # break the loop if the distance is less than t if (mindist <= t){break} } # update status bar status = simnod* f*nrow(mInformedTI) utils::setTxtProgressBar(pb, status) # pasting the whole composition SimGrid_ilr_list[[ii]][path_this_sim[1],path_this_sim[2],] <- TI_ilr_array[BestPoint[,1],BestPoint[,2],] #return(SimGrid_ilr_list[[ii]]) } } #SimGrid_ilr_list = foreach(ii=1:n_realiz,.combine = list) %dopar% myfun(ii) # Empty array to store the backtransfomed realizations SimGrid <- array(data = NA, dim = c(nrow(SimGrid_ilr),ncol(SimGrid_ilr)+1, n_realiz)) # Backtransform to compositional space if(is.matrix(W)){ for (i in 1:n_realiz){ SimGrid_ilr[,,i] <- matrix(as.vector(SimGrid_ilr_list[[i]]),nrow = nrow(SimGrid_ilr),ncol = ncol(SimGrid_ilr)) SimGrid[mask,,i] <- ilrInv(SimGrid_ilr[mask,,i], V=W) varnames_out = rownames(W) } }else{ for (i in 1:n_realiz){ SimGrid[mask,,i] <- matrix(as.vector(SimGrid_ilr_list[[i]]),nrow = nrow(SimGrid_ilr),ncol = ncol(SimGrid_ilr)) varnames_out = colnames(TI_input)[ivars_TI] } } # addition by Raimon 20200402 ddd = dim(SimGrid)[2] if(length(varnames_out)!=ddd) varnames_out[is.null(varnames_out)] = paste("v", 1:ddd, sep="")[is.null(varnames_out)] dimnames(SimGrid) = list(loc=1:nrow(SimGrid_ilr), var=varnames_out, sim=paste("sim", 1:n_realiz, sep="") ) SimGrid=DataFrameStack(SimGrid, stackDim="sim") return(SimGrid) } #' Workhorse function for direct sampling #' #' This function implements in R the direct sampling algorithm #' #' @param n size of the conditioning data event (integer) #' @param f fraction of the training image to scan (numeric between 0 and 1) #' @param t maximal acceptable discrepance between conditioning data event and TI event (numeric between 0 and 1) #' @param n_realiz number of simulations desired #' @param dim_TI dimensions of the grid of the training image (ie. either \eqn{(n_x, n_y)} #' for dimension \eqn{k=2} or \eqn{(n_x, n_y, n_z)} for dimension \eqn{k=3}) #' @param dim_SimGrid dimensions of the simulation grid (ie. either \eqn{(m_x, m_y)} or #' \eqn{(m_x, m_y, m_z)}) #' @param TI_input training image, as a matrix of \eqn{(n_x\cdot n_y\cdot n_z, k+D)} #' elements; WITH NAMED COLUMNS and including spatial coordinates #' @param SimGrid_input simulation grid with conditioning data, as a matrix of #' \eqn{(m_x\cdot m_y\cdot m_z, k+D)} elements; with same columns as `TI_input` #' @param ivars_TI which colnames of `TI_input` and `SimGrid_input` identify variables to consider in the data event #' @param SimGrid_mask either a logical vector of length \eqn{m_x\cdot m_y\cdot m_z}, or else a column name of `SimGrid_input` #' giving a logical column #' @param invertMask logical, does `SimGrid_mask` identify with TRUE the data OUTSIDE the simulation area? #' #' @return A [sp::SpatialPixelsDataFrame()] or [sp::SpatialGridDataFrame()], depending on whether the whole #' grid is simulated. The '@data' slot of these objects contains a [DataFrameStack()] with the stacking dimension #' running through the realisations. It is safer to use this functionality through the interface #' [make.gmCompositionalMPSSpatialModel()], then request a direct simulation with [DSpars()] and #' finally run it with [predict_gmSpatialModel]. #' @export #' @importFrom utils txtProgressBar setTxtProgressBar #' @importFrom stats complete.cases lm #' @author Hassan Talebi (copyright holder), Raimon Tolosana-Delgado #' @examples #' ## training image: #' x = 1:10 #' y = 1:7 #' xy_TI = expand.grid(x=x, y=y) #' TI_input = cbind(xy_TI, t(apply(xy_TI, 1, function(x) c(sum(x), abs(x[2]-x[1]))+rnorm(2, sd=0.01)))) #' colnames(TI_input) = c("x", "y", "V1", "V2") #' o1 = image_cokriged(TI_input, ivar="V1") #' o2 = image_cokriged(TI_input, ivar="V2") #' ## simulation grid: #' SimGrid = TI_input #' SimGrid$mask = with(SimGrid, x==1 | x==10 | y==1 | y==7) #' tk = SimGrid$mask #' tk[sample(70, 50)] = TRUE #' SimGrid[tk,3:4]=NA #' image_cokriged(SimGrid, ivar="V1", breaks=o1$breaks, col=o1$col) #' image_cokriged(SimGrid, ivar="V2", breaks=o2$breaks, col=o2$col) #' image_cokriged(SimGrid, ivar="mask", breaks=c(-0.0001, 0.5, 1.001)) #' \dontrun{ #' res = gsi.DS(n=5, f=0.75, t=0.05, n_realiz=2, dim_TI=c(10,7), dim_SimGrid=c(10,7), #' TI_input=as.matrix(TI_input), SimGrid_input=as.matrix(SimGrid), #' ivars_TI = c("V1", "V2"), SimGrid_mask="mask", invertMask=TRUE) #' image_cokriged(cbind(xy_TI, getStackElement(res,1)), ivar="V1", breaks=o1$breaks, col=o1$col) #' image_cokriged(cbind(xy_TI, getStackElement(res,2)), ivar="V1", breaks=o1$breaks, col=o1$col) #' image_cokriged(cbind(xy_TI, getStackElement(res,1)), ivar="V2", breaks=o2$breaks, col=o2$col) #' image_cokriged(cbind(xy_TI, getStackElement(res,2)), ivar="V2", breaks=o2$breaks, col=o2$col) #' } gsi.DS <- function(n, f, t, n_realiz, dim_TI, dim_SimGrid, TI_input, SimGrid_input, ivars_TI = 3:ncol(TI_input), SimGrid_mask = ncol(SimGrid_input), invertMask = TRUE ){ if(!requireNamespace("FNN", quietly = TRUE)) stop("direct sampling requires package 'KNN' installed") ## constants: nx_TI=dim_TI[1] ny_TI=dim_TI[2] if(length(dim_TI)>2){nz_TI=dim_TI[3]}else{nz_TI=1} nx_SimGrid=dim_SimGrid[1] ny_SimGrid=dim_SimGrid[2] if(length(dim_SimGrid)>2){nz_SimGrid=dim_SimGrid[3]}else{nz_SimGrid=1} ### extract elements out of TI and simgrid # mask if(length(SimGrid_mask)==1){ mask = as.logical(SimGrid_input[, SimGrid_mask]) if(is.numeric(SimGrid_mask)) SimGrid_input = SimGrid_input[,-SimGrid_mask] if(is.character(SimGrid_mask)) SimGrid_input = SimGrid_input[,setdiff(colnames(SimGrid_input), SimGrid_mask)] }else if(length(SimGrid_mask)==nrow(SimGrid_input)){ mask = as.logical(SimGrid_mask) }else stop("gsi.DS4CoDa: SimGrid_mask not interpretable") if(invertMask) mask = !mask # full grid if(is.numeric(ivars_TI)) fullgrid = SimGrid_input[,-ivars_TI] if(is.character(ivars_TI)) fullgrid = SimGrid_input[,setdiff(colnames(SimGrid_input), ivars_TI)] # nr of variables D=length(ivars_TI) # TI TI <- matrix(data = NA, nrow = nrow(TI_input), ncol = D) TI[which(complete.cases(TI_input)),] <- TI_input[which(complete.cases(TI_input)), ivars_TI] # conditioning data + SimGrid SimGrid <- matrix(data = NA, nrow = nrow(SimGrid_input), ncol = D) tk0 = complete.cases(SimGrid_input) if (sum(tk0)>=1) SimGrid[tk0,] <- SimGrid_input[tk0, ivars_TI] # Array to store realizations SimGrid <- replicate(n_realiz, SimGrid) # If not sufficient conditioning data are provided, complete with some unconditional simulations if (sum(tk0)<n){ for (i in 1:n_realiz){ nmissing = n-sum(tk0) stk = sample(x=which(mask &!tk0), size=nmissing) SimGrid[stk,,i] <- TI[sample(x=which(complete.cases(TI)), size = nmissing),] } } # Data range tkTI = complete.cases(TI) CRange <- max(dist(TI[tkTI,])) # Change TI to an array TI_array <- array(as.vector(TI),dim = c(nx_TI,ny_TI, nz_TI, ncol(TI))) # List to store realization SimGrid_list <- list() for (i in 1:n_realiz){ SimGrid_list[[i]] <- array(as.vector(SimGrid[,,i]), dim = c(nx_SimGrid,ny_SimGrid,nz_SimGrid,D) ) } # matrix of the informed nodes in the training image TIinformed_array <- which(!is.na(TI_array[,,,1]), arr.ind = TRUE) # array of nodes to be simulated mask_array <- array(mask, dim = c(nx_SimGrid,ny_SimGrid,nz_SimGrid)) #pb = list() #myfun = function(ii){ for(ii in 1:n_realiz){ cat(paste("\n Realization number #",ii, "\n")) # Defining a fully random path for simulation list_sim <- which(mask_array[,,,drop=T] & is.na(SimGrid_list[[ii]][,,,1]), arr.ind = TRUE) path_sim <- list_sim[sample(nrow(list_sim)),] # initialize progress bar pb <- utils::txtProgressBar(min = 0, max = nrow(path_sim)*f*nrow(TIinformed_array), style = 3) status <- 0 # Looping simulation nodes for (simnod in 1:nrow(path_sim)){ path_this_sim = path_sim[simnod,] # Finding the n closest compositions (hard or simulated) to build the data event tki = !is.na(SimGrid_list[[ii]][,,,1]) dataevesim_discode <- FNN::get.knnx( data=which(tki,arr.ind = TRUE), t(as.matrix(path_this_sim)), # why t(as.matrix(...))? k=n, algorithm=c("kd_tree") ) dataevesim_loc <- which(tki, arr.ind = TRUE)[c(dataevesim_discode$nn.index),] if(ncol(dataevesim_loc)==3){ G = 3 dataevesim <- mapply(function(i, j, k) SimGrid_list[[ii]][i, j, k, 1:D], dataevesim_loc[,1], dataevesim_loc[,2], dataevesim_loc[,3]) }else{ G = 2 dataevesim <- mapply(function(i, j) SimGrid_list[[ii]][i, j, 1, 1:D], dataevesim_loc[,1], dataevesim_loc[,2]) } dataevesim_vec <- dataevesim_loc - matrix(rep( t(as.matrix(path_this_sim)),each=n),nrow=n) # compute lag constellation # Scanning TI for a close pattern path_TI <- TIinformed_array[sample(nrow(TIinformed_array)),] # Initial best distance is set to inf. Update with every best distance encountered mindist <- Inf # Number of tries in the TI nb_of_tries <- ceiling(nrow(path_TI)*f) # Store best pattern encountered so far BestPoint <- matrix(data = NA, nrow = 1, ncol = G) for (tinod in 1:nb_of_tries){ # update progress bar status = status + 1 utils::setTxtProgressBar(pb, status) # Building training pattern and measuring distance dataeveti_loc <- dataevesim_vec + matrix(rep( t(as.matrix(path_TI[tinod,])),each=n),nrow=n) # place the lag constellation on the training image outwin <- dataeveti_loc[,1] <= nx_TI & dataeveti_loc[,2] <= ny_TI & dataeveti_loc[,1] > 0 & dataeveti_loc[,2] > 0 if(G==3) outwin <- outwin & dataeveti_loc[,3] <= nz_TI & dataeveti_loc[,3] > 0 if(sum(outwin)==0){next} if(G==3){ dataeveti <- mapply(function(i, j, k) TI_array[i, j, k, 1:ncol(TI)], dataeveti_loc[outwin,1], dataeveti_loc[outwin,2], dataeveti_loc[outwin,3]) }else{ dataeveti <- mapply(function(i, j) TI_array[i, j, 1, 1:ncol(TI)], dataeveti_loc[outwin,1], dataeveti_loc[outwin,2]) } if(sum(is.na(dataeveti[1,]))>=ncol(dataeveti)){next} mydist <- mean(sqrt(colSums((dataevesim[,outwin] - dataeveti)^2))/CRange,na.rm = TRUE) # Checking for the minimum distance found so far if (mydist < mindist){ mindist <- mydist BestPoint <- t(as.matrix(path_TI[tinod,])) } # break the loop if the distance is less than t if (mindist <= t){break} } # update status bar status = simnod* f*nrow(TIinformed_array) utils::setTxtProgressBar(pb, status) # pasting the whole composition if(G==3){ SimGrid_list[[ii]][path_this_sim[1],path_this_sim[2],path_this_sim[3],] <- TI_array[BestPoint[,1],BestPoint[,2],BestPoint[,3],] }else{ SimGrid_list[[ii]][path_this_sim[1],path_this_sim[2],1,] <- TI_array[BestPoint[,1],BestPoint[,2],1,] } #return(SimGrid_ilr_list[[ii]]) } } # set as DataFrameStack if(is.numeric(ivars_TI)){ varnames_out = tryCatch(colnames(TI_input)[ivars_TI]) }else if(is.character(ivars_TI)){ varnames_out = ivars_TI } if(length(varnames_out)!=D | inherits(varnames_out,"try-error")) varnames_out = paste("v", 1:D, sep="") dm = list(loc=1:length(mask), var=varnames_out, sim=paste("sim", 1:n_realiz, sep="") ) SimGrid_list = lapply(SimGrid_list, function(x){ dim(x) = c(length(x)/D,D) rownames(x) = 1:length(mask) colnames(x) = varnames_out x }) SimGrid=DataFrameStack(SimGrid_list, stackDimName="sim", Dimnames=dm) return(SimGrid) }