Skip to content
Snippets Groups Projects
gmSimulation.R 17.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
    ##### simulation algorithms ------------
    
    ### turning bands -----------
    
    ## move from geostats.R here
    
    
    ### LU Decomposition -----------
    
    ## TODO
    
    ### direct sampling ------
    
    
    
    
    gsi.DS4CoDa <- function(n, f, t, n_realiz, nx_TI, ny_TI, nx_SimGrid, ny_SimGrid, TI_input, SimGrid_input,
                            V = "ilr", ivars_TI = 3:ncol(TI_input), 
                            W = t(gsi.produceV(V=V, D=length(ivars_TI), giveInv = TRUE)@W), 
                            SimGrid_mask = ncol(SimGrid_input), invertMask = TRUE
                            ){
      .Deprecated(new = "gsi.DS", 
    
                  msg="gsi.DS4CoDa is deprecated; use gsi.DS or go via make.gm*-functions followed by DSpars and predict for gmSpatialModel")
    
    Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
      ### extract elements
      # mask
      if(length(SimGrid_mask)==1){
        mask = as.logical(SimGrid_input[, SimGrid_mask])
        if(is.numeric(SimGrid_mask)) SimGrid_input = SimGrid_input[,-SimGrid_mask]
        if(is.character(SimGrid_mask)) SimGrid_input = SimGrid_input[,setdiff(SimGrid_mask,colnames(SimGrid_input))]
      }else if(length(SimGrid_mask)==nrow(SimGrid_input)){
        mask = as.logical(SimGrid_mask)
      }else stop("gsi.DS4CoDa: SimGrid_mask not interpretable")
      if(invertMask) mask = !mask
      
      ### Prepare data matrices and eventually, transform
      if(V=="I"){
        ## case "no transformation"
        D=length(ivars_TI)
        # TI
        TI_ilr <- matrix(data = NA, nrow = nrow(TI_input), ncol = D)
        TI_ilr[which(complete.cases(TI_input)),] <- TI_input[which(complete.cases(TI_input)), ivars_TI]
        # conditioning data  + simgrid
        SimGrid_ilr <- matrix(data = NA, nrow = nrow(SimGrid_input), ncol = D)
        if (length(which(complete.cases(SimGrid_input)))>=1){
          SimGrid_ilr[which(complete.cases(SimGrid_input)),] <- 
            SimGrid_input[which(complete.cases(SimGrid_input)), ivars_TI]
          }
      }else{
        ## case "transform"
        # interpret V
        V = gsi.produceV(V=V, D=length(ivars_TI))
        # make space
        TI_ilr <- matrix(data = NA,nrow = nrow(TI_input),ncol = ncol(V))
        SimGrid_ilr <- matrix(data = NA, nrow = nrow(SimGrid_input), ncol = length(ivars_TI)-1)
        # transform TI
        TI_ilr[which(complete.cases(TI_input)),] <- ilr(TI_input[which(complete.cases(TI_input)),ivars_TI], V=V)
        # transform conditioning data
        if (length(which(complete.cases(SimGrid_input)))>=1){
          SimGrid_ilr[which(complete.cases(SimGrid_input)),] <- 
            ilr(SimGrid_input[which(complete.cases(SimGrid_input)), ivars_TI])
          }
      }
      
      # Array to store realizations
      SimGrid_ilr <- replicate(n_realiz, SimGrid_ilr)
      
      # If conditioning data is not provided, proceed with nonconditional simulation   
      tk0 = complete.cases(SimGrid_input)
      if (sum(tk0)<n){
        for (i in 1:n_realiz){
          nmissing = n-sum(tk0)
          stk = sample(x=which(mask &!tk0), size=nmissing)
          SimGrid_ilr[stk,,i] <- TI_ilr[sample(x=which(complete.cases(TI_ilr)),size = nmissing),]
        }
      }
      
      # Compositional range
      tkTI = complete.cases(TI_ilr)
      CRange <- max(dist(TI_ilr[tkTI,]))
      
      # Change TI to an array
      TI_ilr_array <- array(as.vector(TI_ilr),dim = c(nx_TI,ny_TI, ncol(TI_ilr)))
      
      # List to store realization
      SimGrid_ilr_list <- list()
      for (i in 1:n_realiz){
        SimGrid_ilr_list[[i]] <- array(as.vector(SimGrid_ilr[,,i]), dim = c(nx_SimGrid,ny_SimGrid,ncol(SimGrid_ilr)))
      }
      
      # matrix of the informed nodes in the training image
      mInformedTI  <- which(!is.na(TI_ilr_array[,,1]), arr.ind = TRUE)
      
      # array of nodes to be simulated
      maskArray <- array(mask, dim = c(nx_SimGrid,ny_SimGrid,1))
      
      #pb = list()
      #myfun = function(ii){
      for(ii in 1:n_realiz){
        cat(paste("\n Realization number #",ii, "\n"))
        
        # Defining a fully random path for simulation
        list_sim <- which(maskArray[,,1] & is.na(SimGrid_ilr_list[[ii]][,,1]), arr.ind = TRUE)
        path_sim <- list_sim[sample(nrow(list_sim)),]
        
        # initialize progress bar
        pb <- utils::txtProgressBar(min = 0, max = nrow(path_sim)*f*nrow(mInformedTI), style = 3)
        status <- 0
        # Looping simulation nodes
        for (simnod in 1:nrow(path_sim)){
          path_this_sim = path_sim[simnod,]
          
          # Finding the n closest compositions (hard or simulated) to build the data event
          tki = !is.na(SimGrid_ilr_list[[ii]][,,1])
          dataevesim_discode <- FNN::get.knnx(
              data=which(tki,arr.ind = TRUE), t(as.matrix(path_this_sim)), 
              k=n, 
              algorithm=c("kd_tree")
            )
          dataevesim_loc <- which(tki, arr.ind = TRUE)[dataevesim_discode$nn.index,]
          dataevesim <- mapply(function(i, j) SimGrid_ilr_list[[ii]][i, j, 1:ncol(TI_ilr)], dataevesim_loc[,1], dataevesim_loc[,2])
          dataevesim_vec <- dataevesim_loc - matrix(rep( t(as.matrix(path_this_sim)),each=n),nrow=n)
          
          # Scanning TI for a close pattern
          path_TI <- mInformedTI[sample(nrow(mInformedTI)),]
          
          # Initial best distance is set to inf. Update with every best distance encountered
          mindist <- Inf
          
          # Number of tries in the TI
          nb_of_tries <- ceiling(nrow(path_TI)*f)
          # Store best pattern encountered so far
          BestPoint <- matrix(data = NA,nrow = 1,ncol = 2)
          
          for (tinod in 1:nb_of_tries){
            # update progress bar
            status = status + 1
            utils::setTxtProgressBar(pb, status)
            
            # Building training pattern and measuring distance
            dataeveti_loc <- dataevesim_vec + matrix(rep( t(as.matrix(path_TI[tinod,])),each=n),nrow=n) 
            outwin <- dataeveti_loc[,1] <= nx_TI & dataeveti_loc[,2] <= ny_TI & dataeveti_loc[,1] > 0 & dataeveti_loc[,2] > 0 
            if(sum(outwin)==0){next}
            dataeveti <- mapply(function(i, j) TI_ilr_array[i, j, 1:ncol(TI_ilr)], dataeveti_loc[outwin,1], dataeveti_loc[outwin,2])
            if(sum(is.na(dataeveti[1,]))>=ncol(dataeveti)){next}
            mydist <- mean(sqrt(colSums((dataevesim[,outwin] - dataeveti)^2))/CRange,na.rm = TRUE)
            
            # Checking for the minimum distance found so far
            if (mydist < mindist){
              mindist <- mydist
              BestPoint <- t(as.matrix(path_TI[tinod,]))
            }
            # break the loop if the distance is less than t
            if (mindist <= t){break}
          }
          # update status bar
          status = simnod* f*nrow(mInformedTI)
          utils::setTxtProgressBar(pb, status)
          
      
          # pasting the whole composition
          SimGrid_ilr_list[[ii]][path_this_sim[1],path_this_sim[2],] <- TI_ilr_array[BestPoint[,1],BestPoint[,2],]
          #return(SimGrid_ilr_list[[ii]])
        }
      }
      
      #SimGrid_ilr_list = foreach(ii=1:n_realiz,.combine = list) %dopar% myfun(ii) 
      
        
      # Empty array to store the backtransfomed realizations
      SimGrid <- array(data = NA, dim = c(nrow(SimGrid_ilr),ncol(SimGrid_ilr)+1, n_realiz))
      
      # Backtransform to compositional space
      if(is.matrix(W)){
        for (i in 1:n_realiz){
          SimGrid_ilr[,,i] <- matrix(as.vector(SimGrid_ilr_list[[i]]),nrow = nrow(SimGrid_ilr),ncol = ncol(SimGrid_ilr))
          SimGrid[mask,,i] <- ilrInv(SimGrid_ilr[mask,,i], V=W)
          varnames_out = rownames(W)
        } 
      }else{
        for (i in 1:n_realiz){
          SimGrid[mask,,i] <- matrix(as.vector(SimGrid_ilr_list[[i]]),nrow = nrow(SimGrid_ilr),ncol = ncol(SimGrid_ilr))
          varnames_out = colnames(TI_input)[ivars_TI]
        }
      }
      
    
    Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
    
    
    Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
      # addition by Raimon 20200402
      ddd = dim(SimGrid)[2]
      if(length(varnames_out)!=ddd)   
        varnames_out[is.null(varnames_out)] = paste("v", 1:ddd, sep="")[is.null(varnames_out)]
      dimnames(SimGrid) = list(loc=1:nrow(SimGrid_ilr),
                               var=varnames_out,
                               sim=paste("sim", 1:n_realiz, sep="")
      )
      SimGrid=DataFrameStack(SimGrid, stackDim="sim")
      
      return(SimGrid)  
    }
    
    
    
    
    
    #' Workhorse function for direct sampling
    #' 
    #' This function implements in R the direct sampling algorithm
    #'
    #' @param n size of the conditioning data event (integer)
    #' @param f fraction of the training image to scan (numeric between 0 and 1)
    #' @param t maximal acceptable discrepance between conditioning data event and TI event (numeric between 0 and 1)
    #' @param n_realiz number of simulations desired
    #' @param dim_TI dimensions of the grid of the training image (ie. either \eqn{(n_x, n_y)} 
    #' for dimension \eqn{k=2} or \eqn{(n_x, n_y, n_z)} for dimension \eqn{k=3})
    #' @param dim_SimGrid  dimensions of the simulation grid (ie. either \eqn{(m_x, m_y)} or 
    #' \eqn{(m_x, m_y, m_z)})
    #' @param TI_input training image, as a matrix of \eqn{(n_x\cdot n_y\cdot n_z, k+D)} 
    #' elements; WITH NAMED COLUMNS and including spatial coordinates
    #' @param SimGrid_input simulation grid with conditioning data, as a matrix of 
    #' \eqn{(m_x\cdot m_y\cdot m_z, k+D)} elements; with same columns as `TI_input`
    #' @param ivars_TI which colnames of `TI_input` and `SimGrid_input` identify variables to consider in the data event
    #' @param SimGrid_mask either a logical vector of length \eqn{m_x\cdot m_y\cdot m_z}, or else a column name of `SimGrid_input` 
    #' giving a logical column
    #' @param invertMask logical, does `SimGrid_mask` identify with TRUE the data OUTSIDE the simulation area?
    #'
    #' @return A [sp::SpatialPixelsDataFrame()] or  [sp::SpatialGridDataFrame()], depending on whether the whole
    #' grid is simulated. The '@data' slot of these objects contains a [DataFrameStack()] with the stacking dimension
    #' running through the realisations. It is safer to use this functionality through the interface
    #' [make.gmCompositionalMPSSpatialModel()], then request a direct simulation with [DSpars()] and
    
    #' finally run it with [predict_gmSpatialModel].
    
    Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
    #' @export
    #' @importFrom utils txtProgressBar setTxtProgressBar
    #' @importFrom stats complete.cases lm 
    
    Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
    #' @author Hassan Talebi (copyright holder), Raimon Tolosana-Delgado
    
    Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
    #' @examples
    #' ## training image:
    #' x = 1:10
    #' y = 1:7
    #' xy_TI = expand.grid(x=x, y=y)
    #' TI_input = cbind(xy_TI, t(apply(xy_TI, 1, function(x) c(sum(x), abs(x[2]-x[1]))+rnorm(2, sd=0.01))))
    #' colnames(TI_input) = c("x", "y", "V1", "V2")
    #' o1 = image_cokriged(TI_input, ivar="V1")
    #' o2 = image_cokriged(TI_input, ivar="V2")
    #' ## simulation grid:
    #' SimGrid = TI_input
    #' SimGrid$mask = with(SimGrid, x==1 | x==10 | y==1 | y==7)
    #' tk = SimGrid$mask
    #' tk[sample(70, 50)] = TRUE 
    #' SimGrid[tk,3:4]=NA
    #' image_cokriged(SimGrid, ivar="V1", breaks=o1$breaks, col=o1$col)
    #' image_cokriged(SimGrid, ivar="V2", breaks=o2$breaks, col=o2$col)
    #' image_cokriged(SimGrid, ivar="mask", breaks=c(-0.0001, 0.5, 1.001))
    
    #' res = gsi.DS(n=5, f=0.75, t=0.05, n_realiz=100, dim_TI=c(10,7),  dim_SimGrid=c(10,7), 
    
    Raimon Tolosana-Delgado's avatar
    Raimon Tolosana-Delgado committed
    #'        TI_input=as.matrix(TI_input), SimGrid_input=as.matrix(SimGrid), 
    #'        ivars_TI = c("V1", "V2"), SimGrid_mask="mask", invertMask=TRUE)
    #' image_cokriged(cbind(xy_TI, getStackElement(res,1)), ivar="V1", breaks=o1$breaks, col=o1$col)
    #' image_cokriged(cbind(xy_TI, getStackElement(res,2)), ivar="V1", breaks=o1$breaks, col=o1$col)
    #' image_cokriged(cbind(xy_TI, getStackElement(res,1)), ivar="V2", breaks=o2$breaks, col=o2$col)
    #' image_cokriged(cbind(xy_TI, getStackElement(res,2)), ivar="V2", breaks=o2$breaks, col=o2$col)
    gsi.DS <- function(n, f, t, n_realiz, 
                       dim_TI, dim_SimGrid,  
                       TI_input, SimGrid_input,
                       ivars_TI = 3:ncol(TI_input), 
                       SimGrid_mask = ncol(SimGrid_input), invertMask = TRUE
    ){
      if(!requireNamespace("FNN", quietly = TRUE)) stop("direct sampling requires package 'KNN' installed")
      ## constants:
      nx_TI=dim_TI[1]
      ny_TI=dim_TI[2]
      if(length(dim_TI)>2){nz_TI=dim_TI[3]}else{nz_TI=1}
      nx_SimGrid=dim_SimGrid[1]
      ny_SimGrid=dim_SimGrid[2]
      if(length(dim_SimGrid)>2){nz_SimGrid=dim_SimGrid[3]}else{nz_SimGrid=1}
    
      ### extract elements out of TI and simgrid
      # mask
      if(length(SimGrid_mask)==1){
        mask = as.logical(SimGrid_input[, SimGrid_mask])
        if(is.numeric(SimGrid_mask)) SimGrid_input = SimGrid_input[,-SimGrid_mask]
        if(is.character(SimGrid_mask)) SimGrid_input = SimGrid_input[,setdiff(colnames(SimGrid_input), SimGrid_mask)]
      }else if(length(SimGrid_mask)==nrow(SimGrid_input)){
        mask = as.logical(SimGrid_mask)
      }else stop("gsi.DS4CoDa: SimGrid_mask not interpretable")
      if(invertMask) mask = !mask
      # full grid
      if(is.numeric(ivars_TI)) fullgrid = SimGrid_input[,-ivars_TI]
      if(is.character(ivars_TI)) fullgrid = SimGrid_input[,setdiff(colnames(SimGrid_input), ivars_TI)]
      # nr of variables
      D=length(ivars_TI)
      # TI
      TI <- matrix(data = NA, nrow = nrow(TI_input), ncol = D)
      TI[which(complete.cases(TI_input)),] <- TI_input[which(complete.cases(TI_input)), ivars_TI]
      # conditioning data  + SimGrid
      SimGrid <- matrix(data = NA, nrow = nrow(SimGrid_input), ncol = D)
      tk0 = complete.cases(SimGrid_input)
      if (sum(tk0)>=1)  SimGrid[tk0,] <- SimGrid_input[tk0, ivars_TI]
      
      # Array to store realizations
      SimGrid <- replicate(n_realiz, SimGrid)
      
      # If not sufficient conditioning data are provided, complete with some unconditional simulations   
      if (sum(tk0)<n){
        for (i in 1:n_realiz){
          nmissing = n-sum(tk0)
          stk = sample(x=which(mask &!tk0), size=nmissing)
          SimGrid[stk,,i] <- TI[sample(x=which(complete.cases(TI)), size = nmissing),]
        }
      }
      
      # Data range
      tkTI = complete.cases(TI)
      CRange <- max(dist(TI[tkTI,]))
      
      # Change TI to an array
      TI_array <- array(as.vector(TI),dim = c(nx_TI,ny_TI, nz_TI, ncol(TI)))
      
      # List to store realization
      SimGrid_list <- list()
      for (i in 1:n_realiz){
        SimGrid_list[[i]] <- array(as.vector(SimGrid[,,i]), dim = c(nx_SimGrid,ny_SimGrid,nz_SimGrid,D) )
      }
      
      # matrix of the informed nodes in the training image
      TIinformed_array  <- which(!is.na(TI_array[,,,1]), arr.ind = TRUE)
      
      # array of nodes to be simulated
      mask_array <- array(mask, dim = c(nx_SimGrid,ny_SimGrid,nz_SimGrid))
      
      #pb = list()
      #myfun = function(ii){
      for(ii in 1:n_realiz){
        cat(paste("\n Realization number #",ii, "\n"))
        
        # Defining a fully random path for simulation
        list_sim <- which(mask_array[,,,drop=T] & is.na(SimGrid_list[[ii]][,,,1]), arr.ind = TRUE)
        path_sim <- list_sim[sample(nrow(list_sim)),]
        
        # initialize progress bar
        pb <- utils::txtProgressBar(min = 0, max = nrow(path_sim)*f*nrow(TIinformed_array), style = 3)
        status <- 0
        # Looping simulation nodes
        for (simnod in 1:nrow(path_sim)){
          path_this_sim = path_sim[simnod,]
          
          # Finding the n closest compositions (hard or simulated) to build the data event
          tki = !is.na(SimGrid_list[[ii]][,,,1])
          dataevesim_discode <- FNN::get.knnx(
            data=which(tki,arr.ind = TRUE), t(as.matrix(path_this_sim)),  # why t(as.matrix(...))?
            k=n, 
            algorithm=c("kd_tree")
          )
          dataevesim_loc <- which(tki, arr.ind = TRUE)[c(dataevesim_discode$nn.index),]
          if(ncol(dataevesim_loc)==3){
            G = 3
            dataevesim <- mapply(function(i, j, k) SimGrid_list[[ii]][i, j, k, 1:D], dataevesim_loc[,1], dataevesim_loc[,2], dataevesim_loc[,3])        
          }else{
            G = 2
            dataevesim <- mapply(function(i, j) SimGrid_list[[ii]][i, j, 1, 1:D], dataevesim_loc[,1], dataevesim_loc[,2])
          }
          dataevesim_vec <- dataevesim_loc - matrix(rep( t(as.matrix(path_this_sim)),each=n),nrow=n) # compute lag constellation 
          
          # Scanning TI for a close pattern
          path_TI <- TIinformed_array[sample(nrow(TIinformed_array)),]
          
          # Initial best distance is set to inf. Update with every best distance encountered
          mindist <- Inf
          
          # Number of tries in the TI
          nb_of_tries <- ceiling(nrow(path_TI)*f)
          # Store best pattern encountered so far
          BestPoint <- matrix(data = NA, nrow = 1, ncol = G)
          
          for (tinod in 1:nb_of_tries){
            # update progress bar
            status = status + 1
            utils::setTxtProgressBar(pb, status)
            
            # Building training pattern and measuring distance
            dataeveti_loc <- dataevesim_vec + matrix(rep( t(as.matrix(path_TI[tinod,])),each=n),nrow=n)  # place the lag constellation on the training image
            outwin <- dataeveti_loc[,1] <= nx_TI & dataeveti_loc[,2] <= ny_TI &  dataeveti_loc[,1] > 0 & dataeveti_loc[,2] > 0  
            if(G==3) outwin <- outwin & dataeveti_loc[,3] <= nz_TI  & dataeveti_loc[,3] > 0 
            if(sum(outwin)==0){next}
            if(G==3){
              dataeveti <- mapply(function(i, j, k) TI_array[i, j, k, 1:ncol(TI)], dataeveti_loc[outwin,1], dataeveti_loc[outwin,2], dataeveti_loc[outwin,3])
            }else{
              dataeveti <- mapply(function(i, j) TI_array[i, j, 1, 1:ncol(TI)], dataeveti_loc[outwin,1], dataeveti_loc[outwin,2])
            }
            if(sum(is.na(dataeveti[1,]))>=ncol(dataeveti)){next}
            mydist <- mean(sqrt(colSums((dataevesim[,outwin] - dataeveti)^2))/CRange,na.rm = TRUE)
            
            # Checking for the minimum distance found so far
            if (mydist < mindist){
              mindist <- mydist
              BestPoint <- t(as.matrix(path_TI[tinod,]))
            }
            # break the loop if the distance is less than t
            if (mindist <= t){break}
          }
          # update status bar
          status = simnod* f*nrow(TIinformed_array)
          utils::setTxtProgressBar(pb, status)
          
          
          # pasting the whole composition
          if(G==3){
            SimGrid_list[[ii]][path_this_sim[1],path_this_sim[2],path_this_sim[3],] <- TI_array[BestPoint[,1],BestPoint[,2],BestPoint[,3],]        
          }else{
            SimGrid_list[[ii]][path_this_sim[1],path_this_sim[2],1,] <- TI_array[BestPoint[,1],BestPoint[,2],1,]
          }
    
          #return(SimGrid_ilr_list[[ii]])
        }
      }
      
      # set as DataFrameStack
      if(is.numeric(ivars_TI)){
        varnames_out = tryCatch(colnames(TI_input)[ivars_TI])
      }else if(is.character(ivars_TI)){
         varnames_out = ivars_TI
      }
      if(length(varnames_out)!=D | class(varnames_out)=="try-error") varnames_out = paste("v", 1:D, sep="")
      dm = list(loc=1:length(mask), var=varnames_out, sim=paste("sim", 1:n_realiz, sep="") )
      
      SimGrid_list = lapply(SimGrid_list, function(x){
        dim(x) = c(length(x)/D,D)
        rownames(x) = 1:length(mask)
        colnames(x) = varnames_out
        x
      })
      
      SimGrid=DataFrameStack(SimGrid_list, stackDimName="sim", Dimnames=dm)
      
    
      return(SimGrid)  
    }