diff --git a/.Rbuildignore b/.Rbuildignore index 19be397dd53b1b37d5f638d062da28a39d54e8c2..f0a1a1845dd6bf0333b24a9df5e003765d47f479 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -13,3 +13,4 @@ svn ^cran-comments\.md$ jurapred ^README\.Rmd$ +^.*\.vscode$ diff --git a/DESCRIPTION b/DESCRIPTION index a2f6fde531c4d1f5f09fe97a0c43de320787e786..3a9b1611d95f42823781c1f0ac9bbf1a1a92964f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: gmGeostats -Version: 0.10-7.9005 +Version: 0.10-7.9006 Date: 2020-10-05 Title: Geostatistics for Compositional Analysis Authors@R: c(person(given = "Raimon", diff --git a/NAMESPACE b/NAMESPACE index 98271219ce69c0f9b152e345b78ac09091d3c50e..1268eb1da8a9adcdf17839a26caa94003a018dcf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -99,10 +99,7 @@ S3method(plot,gmEVario) S3method(plot,logratioVariogramAnisotropy) S3method(plot,swarmPlot) S3method(precision,accuracy) -S3method(predict,LMCAnisCompo) -S3method(predict,genDiag) S3method(predict,gmCgram) -S3method(predict,gmSpatialModel) S3method(print,mask) S3method(setMask,DataFrameStack) S3method(setMask,GridTopology) @@ -134,6 +131,7 @@ S3method(variogramModelPlot,gstatVariogram) S3method(variogramModelPlot,logratioVariogram) S3method(xvErrorMeasures,DataFrameStack) S3method(xvErrorMeasures,data.frame) +S3method(xvErrorMeasures,default) export("stackDim<-") export(CholeskyDecomposition) export(DSpars) @@ -194,6 +192,9 @@ export(noSpatCorr.test) export(noStackDim) export(pairsmap) export(precision) +export(predict) +export(predict.LMCAnisCompo) +export(predict.genDiag) export(pwlrmap) export(setCgram) export(setGridOrder) @@ -233,6 +234,7 @@ exportClasses(gmUnconditionalSpatialModel) exportClasses(gmValidationStrategy) exportMethods(as.gstat) exportMethods(logratioVariogram) +exportMethods(predict) exportMethods(stackDim) exportMethods(variogram) import(RColorBrewer) diff --git a/NEWS.md b/NEWS.md index ffb8bdec8e8984a4eff5cc9473a0842abaf6b71f..acbde3e5b03786d4cddef07c30f4bbbc4dfdfb8e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# gmGeostats 0.10.7.9006 + +* new vignette "How to register new layer datatypes", with a howto adapt gmGeostats to deal with your very special data type (with an example for regionalized circular data) +* (2021-07-06) new option to extract univariate accuracy() from multivariate kriging results +* NEWS.md and vignettes/gmGeostats.Rmd polished +* "predict"-methods for objects of class gmSpatialModel now programmed with multiple dispatching, to enable further extensions + # gmGeostats 0.10.7.9005 * (2021-06-30) bugs in xvErrorMeasures() for simulated data corrected diff --git a/R/accuracy.R b/R/accuracy.R index 272759201b192366976fb17bb13ed85c3366b00a..742265aa3013163422a5e3a87fa3fa9073afec69 100644 --- a/R/accuracy.R +++ b/R/accuracy.R @@ -72,12 +72,27 @@ accuracy <- function(x,...) UseMethod("accuracy", x) #' c("simulation", "mahalanobis", "flow") for `x` of class [DataFrameStack()]. #' @param outMahalanobis if TRUE, do not do the final accuracy calculations and return the Mahalanobis #' norms of the residuals; if FALSE do the accuracy calculations +#' @param ivar if `method`="kriging" or "cokriging" you can also specify here one single variable name +#' to consider for univariate accuracy; this variable name must exist both in `x` +#' (including "pred" and "var" prefixes or suffixes in the column names) and in `observed`; +#' this might require renaming the columns of these files! #' @export accuracy.data.frame <- function(x, observed=x$observed, prob = seq(from=0, to=1, by=0.05), - method="kriging", outMahalanobis=FALSE, ...){ + method="kriging", outMahalanobis=FALSE, + ivar, ...){ methods = c("kriging", "cokriging", "simulation") mm = methods[pmatch(method, methods)] + if(!missing(ivar)){ + iTrue = grep(ivar, colnames(observed)) + iPred = intersect(grep(ivar, colnames(x)), grep("pred", colnames(x))) + iVar = intersect(grep(ivar, colnames(x)), grep("var", colnames(x))) + if(any(sapply(list(iTrue, iPred, iVar), length)!=1)) + stop("accuracy: univariate case by specifying an `ivar` requires the named variable to occur ONCE in `observed` and once in `x`, here prefixed or suffixed with `pred` and `var` to identify kriging predictions and variances") + mm = "kriging" + observed = observed[,iTrue] + x = x[, c(iPred, iVar)] + } if(mm=="kriging"){ mynorms = xvErrorMeasures(x, observed=observed, output = "Mahalanobis", univariate=TRUE) D = 1 @@ -121,11 +136,11 @@ accuracy.data.frame <- function(x, observed=x$observed, #' @describeIn accuracy Compute accuracy and precision -#' @param vars in multivariate cases, a vector of names of the variables to analyse +#' @param ivars in multivariate cases, a vector of names of the variables to analyse (or one single variable name) #' @method accuracy DataFrameStack #' @export accuracy.DataFrameStack <- function(x, observed, - vars = intersect(colnames(observed), dimnames(x)[[noStackDim(x)]]), + ivars = intersect(colnames(observed), dimnames(x)[[noStackDim(x)]]), prob = seq(from=0, to=1, by=0.05), method = ifelse(length(vars)==1, "simulation", "Mahalanobis"), outMahalanobis=FALSE, ...){ @@ -146,8 +161,8 @@ accuracy.DataFrameStack <- function(x, observed, if(nrow(x)!=length(observed)) if(nrow(x)!=nrow(observed)) stop("accuracy: dimensions of x and observed do not match") - sims = as.matrix(gmApply(x, FUN=function(xx)xx[,vars])) - res = sapply(1:nrow(sims), function(i) oneAcc.sim(sims[i,], observed[i, vars])) + sims = as.matrix(gmApply(x, FUN=function(xx)xx[,ivars, drop=FALSE])) + res = sapply(1:nrow(sims), function(i) oneAcc.sim(sims[i,], observed[i, ivars, drop=FALSE])) aa = outer(res, prob, "<=") a = colMeans(aa) erg = data.frame(p=prob, accuracy=a) @@ -157,8 +172,8 @@ accuracy.DataFrameStack <- function(x, observed, sim_array = as.array(x) permidx = c(ifelse(is.numeric(stackDim(x)),1,names(dimnames(x))[1]),stackDim(x),noStackDim(x)) sim_array = aperm(sim_array, perm = permidx) - sim_array = sim_array[,,vars] - observed = as.matrix(unclass(observed)[,vars]) + sim_array = sim_array[,,ivars, drop=FALSE] + observed = as.matrix(unclass(observed)[,ivars, drop=FALSE]) N = dim(sim_array)[1] S = dim(sim_array)[stackDim(x)] D = dim(sim_array)[noStackDim(x)] @@ -305,6 +320,36 @@ plot.accuracy <- function(x, xlim=c(0,1), ylim=c(0,1), xaxs="i", yaxs="i", type= xvErrorMeasures <- function(x,...) UseMethod("xvErrorMeasures", x) + +#' Cross-validation errror measures +#' +#' Compute one or more error measures from cross-validation output +#' +#' @param x a vector containing the predicted values +#' @param krigVar a vector containing the kriging variances +#' @param observed a vector containing the true values +#' @param output which output do you want? a vector of one or several of c("ME","MSE","MSDR","Mahalanobis") +#' +#' @return +#' +#' @export +#' @method xvErrorMeasures default +#' @family accuracy functions +xvErrorMeasures.default <- function(x, krigVar, observed, output="MSDR1"){ + if(length(output)>1) + return(sapply(output, function(o) xvErrorMeasures(x, krigVar, observed, output=o))) + + outputs = c("ME","MSE","MSDR","MSDR1","MSDR2","Mahalanobis") + output = outputs[pmatch(output, outputs, duplicates.ok = TRUE)] + + resids = x - observed + mynorms = resids^2/krigVar + if(output=="ME") return(mean(resids, na.rm=TRUE)) + if(output=="MSE") return(mean(resids^2, na.rm=TRUE)) + if(output=="MSDR") return(mean(mynorms, na.rm=TRUE)) + if(output=="Mahalanobis") return(mynorms) +} + #' Cross-validation errror measures #' #' Compute one or more error measures from cross-validation output diff --git a/R/gmSpatialModel.R b/R/gmSpatialModel.R index a2719bec80e34afa91bb4ec165c81c18953e6281..c489ab1ff5d87830a14f94014718c3dfd606b385 100644 --- a/R/gmSpatialModel.R +++ b/R/gmSpatialModel.R @@ -286,10 +286,10 @@ as.gstat.gmSpatialModel <- function(object, ...){ # data elements coords = sp::coordinates(object) X = compositions::rmult(object@data, V= gsi.getV(object@data), orig=gsi.orig(object@data)) - compo = backtransform(X) V = gsi.getV(X) if(!is.null(V)){ # compositional case + compo = backtransform(X) Vinv = t(gsiInv(V)) prefix = sub("1","",colnames(V)[1]) if(is.null(prefix) | length(prefix)==0) prefix = sub("1","",colnames(object@data)[1]) @@ -313,7 +313,16 @@ as.gstat.gmSpatialModel <- function(object, ...){ return(res) }else{ # non-compositional case - + prefix = sub("1","",colnames(V)[1]) + vgLMC <- object@model@structure + formulaterm = paste(as.character(object@model@formula), collapse="") + beta = object@model@beta + if(any(is.infinite(beta))) beta = NULL + # neighbourhood + ng = object@parameters + res = rmult2gstat(coords=coords, data=X, V=V, vgLMC=vgLMC, + nscore=FALSE, formulaterm = formulaterm, prefix=prefix, beta=beta, + nmax=ng$nmax, nmin=ng$nmin, omax=ng$omax, maxdist=ng$maxdist, force=ng$force) } } @@ -356,7 +365,8 @@ as.gmSpatialModel.gstat = function(object, V=NULL, ...){ #' @param newdata a collection of locations where a prediction/simulation is desired; this is typically #' a [sp::SpatialPoints()], a data.frame or similar of X-Y(-Z) coordinates; or perhaps for gridded data #' an object of class [sp::GridTopology()], [sp::SpatialGrid()] or [sp::SpatialPixels()] -#' @param pars parameters describing the method to use, *encloded in an object of appropriate class* (see below) +#' @param pars parameters describing the method to use, *enclosed in an object of appropriate class* +#' (according to the method; see below) #' @param ... further parameters for generic functionality, currently ignored #' #' @return Depending on the nature of `newdata`, the result will be a data container of the same kind, @@ -383,114 +393,168 @@ as.gmSpatialModel.gstat = function(object, V=NULL, ...){ #' Conversely, to run a multipoint geostatistics algorithm, the first condition is that `object@model` contains a #' training image. Additionally, `pars` must describe the characteristics of the algorithm to use. Currently, only #' direct sampling is available: it can be obtained by providing some parameter object created with a call to -#' [DirectSamplingParameters()]. Currently it is also necessary that `newdata` is a gridded set of locations. +#' [DirectSamplingParameters()]. This method requires `newdata` to be on a gridded set of locations (acceptable +#' object classes are `sp::gridTopology`, `sp::SpatialGrid`, `sp::SpatialPixels`, `sp::SpatialPoints` or `data.frame`, +#' for the last two a forced conversion to a grid will be attempted). +#' @family gmSpatialModel +#' @name predict_gmSpatialModel +NULL + +#' @rdname predict_gmSpatialModel #' @export -#' @family gmSpatialModel -predict.gmSpatialModel <- function(object, newdata=NULL, pars=object@parameters, ...){ - # deal with (co)kriging - if(is(pars, "gmNeighbourhoodSpecification")){ - cat("starting cokriging \n") - object@parameters = pars - return(predict(as.gstat(object), newdata=newdata, ...)) - } - # probe on simulation methods - if(is(pars, "gmDirectSamplingParameters")){ - cat("starting direct sampling \n") - # extract training image - gt.ti = sp::getGridTopology(object@model) - dt.ti = as(object@model,"SpatialGridDataFrame")@data - # extract newdata mask - mask = getMask(newdata) - # fuse newdata, conditioning data and mask - if(is.null(newdata)){ - gt.nw = NULL # object@grid - newdata = sp::SpatialPixelsDataFrame(points = as(object, "SpatialPoints"), data=object@data) - # tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) - }else if(is(newdata, "GridTopology")){ - gt.nw = newdata - newdata = sp::SpatialPixelsDataFrame(points = as(object, "SpatialPoints"), data=object@data, - tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) - }else if(is(newdata, "SpatialGrid")){ - gt.nw = sp::getGridTopology(newdata) - newdata = sp::SpatialPixelsDataFrame(points = as(object, "SpatialPoints"), data=object@data, - tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) - }else if(is(newdata, "SpatialPixels")){ - gt.nw = sp::getGridTopology(newdata) - cc = rbind(sp::coordinates(newdata), sp::coordinates(object)) - ### WARNING: This is how it works!! COrrect from SpatialPoints and data.frame - aux = matrix(NA, nrow=nrow(sp::coordinates(newdata)), ncol=ncol(object@data)) - colnames(aux) = colnames(object@data) - dt = rbind( aux , object@data) - newdata = sp::SpatialPixelsDataFrame(points = sp::SpatialPoints(cc), data=dt, - tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) - }else if(is(newdata, "SpatialPoints")){ - gt.nw = object@model@grid - cc = rbind(sp::coordinates(object), sp::coordinates(newdata)) - ### WARNING: NOT YET TESTED WHAT HAPPENS WITH DUPLICATE LOCATIONS!!! - aux = matrix(NA, nrow=nrow(sp::coordinates(newdata)), ncol=ncol(object@data)) - colnames(aux) = colnames(object@data) - dt = rbind( object@data, aux ) - newdata = sp::SpatialPixelsDataFrame(points = sp::SpatialPoints(cc), data=dt, - tolerance = sqrt(sum(gt.nw@cellsize^2))/2) # , grid=gt.nw) - }else if(is.data.frame(newdata)){ - gt.nw = NULL # object@grid - cc = rbind(sp::coordinates(object), newdata[,1:2]) - if( all( colnames(object@data) %in% colnames(newdata) ) ){ - aux = as.matrix(newdata[,colnames(object@data)]) - }else{ - aux = matrix(NA, nrow=nrow(sp::coordinates(newdata)), ncol=ncol(object@data)) - } - colnames(aux) = colnames(object@data) - ### WARNING: NOT YET TESTED WHAT HAPPENS WITH DUPLICATE LOCATIONS!!! - dt = rbind( object@data, aux ) - newdata = sp::SpatialPixelsDataFrame(points = sp::SpatialPoints(cc), data=dt) - # tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) - } - if(is.null(gt.nw)) gt.nw = sp::getGridTopology(newdata) - dt.nw = as(newdata,"SpatialGridDataFrame")@data - dt.ti = as(object@model,"SpatialGridDataFrame")@data - if(all(gt.nw@cellsize!=gt.ti@cellsize)) - stop("predict.gmSpatialModel with gmDirectSamplingParameters: inferred grid topologies for newdata, conditioning data and model do not coincide") - if(is.null(mask)) mask = rep(TRUE, nrow(dt.nw)) - #erg = gsi.DS4CoDa(n=pars$patternSize, f=pars$scanFraction, t=pars$gof, n_realiz=pars$nsim, - # nx_TI=gt.ti@cells.dim[1], ny_TI=gt.ti@cells.dim[2], - # nx_SimGrid= gt.nw@cells.dim[1], ny_SimGrid=gt.nw@cells.dim[2], - # TI_input=as.matrix(dt.ti), - # SimGrid_input=as.matrix(dt.nw), - # V = "I", W=gsi.getV(object@data), - # ivars_TI = colnames(dt.ti), - # SimGrid_mask = mask, - # invertMask = FALSE) - erg = gsi.DS(n=pars$patternSize, f=pars$scanFraction, t=pars$gof, n_realiz=pars$nsim, - dim_TI=gt.ti@cells.dim, dim_SimGrid=gt.nw@cells.dim, - TI_input=as.matrix(dt.ti), SimGrid_input=as.matrix(dt.nw), - ivars_TI = colnames(dt.ti), - SimGrid_mask = mask, - invertMask = FALSE) - erg = gmApply(erg, FUN=ilrInv, V=gsi.getV(object@data), orig=gsi.orig(object@data)) - erg = sp::SpatialGridDataFrame(grid = gt.nw, data=erg) - return(erg) - }else if(is(pars, "gmSequentialSimulation")){ - cat("starting SGs \n") - object@parameters = pars$ng - erg = predict(as.gstat(object), newdata=newdata, nsim=pars$nsim, debug.level=pars$debug.level, ...) - Dg = ncol(object@coords) - erg = DataFrameStack(erg[,-(1:Dg)], - dimnames=list( - loc=1:nrow(erg), sim=1:pars$nsim, - var=colnames(object@data) - ), - stackDim="sim") - attr(erg, "coords") = newdata - return(erg) - }else if(is(pars, "gmTurningBands")){ - cat("starting turning bands \n") - }else if(is(pars, "gmCholeskyDecomposition")){ - cat("starting Choleski decomposition \n") - } - -} +setGeneric("predict", function(object, newdata, pars, ...){ + standardGeneric("predict") +}) + + +#' @rdname predict_gmSpatialModel +#' @export +setMethod("predict",signature(object="gmSpatialModel", newdata="ANY", pars="missing"), + function(object, newdata, pars, ...){ + if(is.null(object$pars)) object$pars = KrigingNeighbourhood() + predict(object, newdata, pars = object$pars, ...) + } +) + +#' @rdname predict_gmSpatialModel +#' @export +setMethod("predict",signature(object="gmSpatialModel", newdata="ANY", pars="gmNeighbourhoodSpecification"), + function(object, newdata, pars, ...){ + cat("starting cokriging \n") + object@parameters = pars + out = gstat:::predict.gstat(as.gstat(object), newdata=newdata) + return(out) + } +) + + + +#' @rdname predict_gmSpatialModel +#' @export +setMethod("predict",signature(object="gmSpatialModel", newdata="ANY", pars="gmTurningBands"), + function(object, newdata, pars, ...){ + stop("Turning Bands method not yet interfaced here; use") + cat("starting turning bands \n") + } +) + + +#' @rdname predict_gmSpatialModel +#' @export +setMethod("predict",signature(object="gmSpatialModel", newdata="ANY", pars="gmCholeskyDecomposition"), + function(object, newdata, pars, ...){ + stop("Choleski decomposition method not yet implemented") + cat("starting Choleski decomposition \n") + } +) + + + + +#' @rdname predict_gmSpatialModel +#' @export +setMethod("predict",signature(object="gmSpatialModel", newdata="ANY", pars="gmSequentialSimulation"), + function(object, newdata, pars, ...){ + cat("starting SGs \n") + object@parameters = pars$ng + erg = predict(as.gstat(object), newdata=newdata, nsim=pars$nsim, debug.level=pars$debug.level, ...) + Dg = ncol(object@coords) + erg = DataFrameStack(erg[,-(1:Dg)], + dimnames=list( + loc=1:nrow(erg), sim=1:pars$nsim, + var=colnames(object@data) + ), + stackDim="sim") + attr(erg, "coords") = newdata + return(erg) + } +) + + + +#' @rdname predict_gmSpatialModel +#' @export +setMethod("predict",signature(object="gmSpatialModel", newdata="ANY", pars="gmDirectSamplingParameters"), + function(object, newdata, pars, ...){ + + cat("starting direct sampling \n") + # extract training image + gt.ti = sp::getGridTopology(object@model) + dt.ti = as(object@model,"SpatialGridDataFrame")@data + # extract newdata mask + mask = getMask(newdata) + # fuse newdata, conditioning data and mask + if(is.null(newdata)){ + gt.nw = NULL # object@grid + newdata = sp::SpatialPixelsDataFrame(points = as(object, "SpatialPoints"), data=object@data) + # tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) + }else if(is(newdata, "GridTopology")){ + gt.nw = newdata + newdata = sp::SpatialPixelsDataFrame(points = as(object, "SpatialPoints"), data=object@data, + tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) + }else if(is(newdata, "SpatialGrid")){ + gt.nw = sp::getGridTopology(newdata) + newdata = sp::SpatialPixelsDataFrame(points = as(object, "SpatialPoints"), data=object@data, + tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) + }else if(is(newdata, "SpatialPixels")){ + gt.nw = sp::getGridTopology(newdata) + cc = rbind(sp::coordinates(newdata), sp::coordinates(object)) + ### WARNING: This is how it works!! COrrect from SpatialPoints and data.frame + aux = matrix(NA, nrow=nrow(sp::coordinates(newdata)), ncol=ncol(object@data)) + colnames(aux) = colnames(object@data) + dt = rbind( aux , object@data) + newdata = sp::SpatialPixelsDataFrame(points = sp::SpatialPoints(cc), data=dt, + tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) + }else if(is(newdata, "SpatialPoints")){ + gt.nw = object@model@grid + cc = rbind(sp::coordinates(object), sp::coordinates(newdata)) + ### WARNING: NOT YET TESTED WHAT HAPPENS WITH DUPLICATE LOCATIONS!!! + aux = matrix(NA, nrow=nrow(sp::coordinates(newdata)), ncol=ncol(object@data)) + colnames(aux) = colnames(object@data) + dt = rbind( object@data, aux ) + newdata = sp::SpatialPixelsDataFrame(points = sp::SpatialPoints(cc), data=dt, + tolerance = sqrt(sum(gt.nw@cellsize^2))/2) # , grid=gt.nw) + }else if(is.data.frame(newdata)){ + gt.nw = NULL # object@grid + cc = rbind(sp::coordinates(object), newdata[,1:2]) + if( all( colnames(object@data) %in% colnames(newdata) ) ){ + aux = as.matrix(newdata[,colnames(object@data)]) + }else{ + aux = matrix(NA, nrow=nrow(sp::coordinates(newdata)), ncol=ncol(object@data)) + } + colnames(aux) = colnames(object@data) + ### WARNING: NOT YET TESTED WHAT HAPPENS WITH DUPLICATE LOCATIONS!!! + dt = rbind( object@data, aux ) + newdata = sp::SpatialPixelsDataFrame(points = sp::SpatialPoints(cc), data=dt) + # tolerance = sqrt(sum(gt.nw@cellsize^2))/2 , grid=gt.nw) + } + if(is.null(gt.nw)) gt.nw = sp::getGridTopology(newdata) + dt.nw = as(newdata,"SpatialGridDataFrame")@data + dt.ti = as(object@model,"SpatialGridDataFrame")@data + if(all(gt.nw@cellsize!=gt.ti@cellsize)) + stop("predict.gmSpatialModel with gmDirectSamplingParameters: inferred grid topologies for newdata, conditioning data and model do not coincide") + if(is.null(mask)) mask = rep(TRUE, nrow(dt.nw)) + #erg = gsi.DS4CoDa(n=pars$patternSize, f=pars$scanFraction, t=pars$gof, n_realiz=pars$nsim, + # nx_TI=gt.ti@cells.dim[1], ny_TI=gt.ti@cells.dim[2], + # nx_SimGrid= gt.nw@cells.dim[1], ny_SimGrid=gt.nw@cells.dim[2], + # TI_input=as.matrix(dt.ti), + # SimGrid_input=as.matrix(dt.nw), + # V = "I", W=gsi.getV(object@data), + # ivars_TI = colnames(dt.ti), + # SimGrid_mask = mask, + # invertMask = FALSE) + erg = gsi.DS(n=pars$patternSize, f=pars$scanFraction, t=pars$gof, n_realiz=pars$nsim, + dim_TI=gt.ti@cells.dim, dim_SimGrid=gt.nw@cells.dim, + TI_input=as.matrix(dt.ti), SimGrid_input=as.matrix(dt.nw), + ivars_TI = colnames(dt.ti), + SimGrid_mask = mask, + invertMask = FALSE) + erg = gmApply(erg, FUN=ilrInv, V=gsi.getV(object@data), orig=gsi.orig(object@data)) + erg = sp::SpatialGridDataFrame(grid = gt.nw, data=erg) + return(erg) + } +) #' @describeIn gmSpatialModel Compute a variogram, see [variogram_gmSpatialModel()] for details diff --git a/R/grids.R b/R/grids.R index b1d396e63a1b0971bac22a5d6543b7318a16bc8e..86515410163b5436c03ba03bb95627287676e760 100644 --- a/R/grids.R +++ b/R/grids.R @@ -267,7 +267,7 @@ gsi.gstatCokriging2rmult.data.frame = function(COKresult, # output of predict.gs D = length(prednames) noms = sub(".pred", "", prednames) - prediccions = COKresult[,prednames, drop=FALSE] + prediccions = rmult(COKresult[,prednames, drop=FALSE]) if(nscore){ ## space to back-transform the predictions #if(is.null(gg))stop("To apply a nscore backtransformation, the gstat object must be provided!") @@ -304,7 +304,7 @@ gsi.gstatCokriging2rmult.data.frame = function(COKresult, # output of predict.gs } attr(rg,"krigVar") = cvmat } - class(rg) = c("spatialGridRmult","rmult","data.frame") + class(rg) = c("spatialGridRmult","rmult") return(rg) } diff --git a/R/gstatCompatibility.R b/R/gstatCompatibility.R index 16f756209c64b69e5b17a5da2a555db747e910a1..8b4e981f5d3c6db0b6cafb6f33b8ded4996a876a 100644 --- a/R/gstatCompatibility.R +++ b/R/gstatCompatibility.R @@ -114,7 +114,7 @@ compo2gstatLR = function(coords, compo, V=ilrBase(compo), prefix = o$prefix V = o$V # compute data (in lrs or in normal scores), set variable names - Zlr = compositions::ilr(compo, V=V) + Zlr = compositions::idt(compo, V=V) if(nscore){ source("nscore.R") # load the nscore.R functions prefix= paste("NS",prefix,sep="") @@ -145,6 +145,44 @@ compo2gstatLR = function(coords, compo, V=ilrBase(compo), return(gg) } +## version for rmult +rmult2gstat = function(coords, data, V="cdt", + vgLMC=NULL, nscore=FALSE, + formulaterm = "~1", prefix=NULL, ...){ + + P = ncol(data) + if(nscore){ + source("nscore.R") # load the nscore.R functions + prefix= paste("NS",prefix,sep="") + Z = sapply(1:P, function(i){ + rs = nscore(data[,i]) + aux = rs$nscore + attr(aux,"trn.table") = rs$trn.table # this ensures that the backtransformation is stored in the object + return(data.frame(aux)) + }) + Z = as.data.frame(Z) + }else{ + Z = data + } + if(is.null(colnames(Z))) colnames(Z) = paste(prefix, 1:P, sep="") + # create gstat object + spatdescr = paste("~",c(paste(colnames(coords),collapse=" + ")), sep="") + gg = NULL + for(i in 1:P){ + id = colnames(Z)[i] + frm = paste(id, formulaterm, sep="") + gg = gstat::gstat(gg, id=id, formula = stats::as.formula(frm), locations=stats::as.formula(spatdescr), data=data.frame(coords, Z), ...) + } + # if a logratio LMC was provided, convert it to gstat variogramModelList + # and attach it + if(!is.null(vgLMC) & !nscore){ + # space for a future conversion of variation-variogram models to gstat-LR-variograms + gg$model = as.variogramModel(vgLMC, prefix=prefix) + } + # return + return(gg) +} + getGstatData = function(gg # gstat object diff --git a/README.Rmd b/README.Rmd index 7054ff338d1e0c50a420474f9c1c57b43f4538f2..9a5ccfe5729066334db70c49e6faeb5caeeeead6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -36,7 +36,7 @@ install.packages("gmGeostats") Read the package vignette for an extended scheme of the package functionality. The fundamental steps are: ```{r example} -## load the package and its dependencies +## load the package (NOTE: do not load "compositions" or "gstat" afterwards!) library(gmGeostats) ## read your data, identify coordinates and sets of variables @@ -70,16 +70,25 @@ vm = gstat::vgm(model="Sph", range=25, nugget=1, psill=1) gsm.f = fit_lmc(v = vge, g = gsm, model = vm) ## plot -variogramModelPlot(vge, model = gsm.f) +variogramModelPlot(vge, model = gsm.f, col="red") ``` +The resulting variogram model (`gsm.f$model` in case of a "gstat" object) can the be appended to the +spatial model, with +``` +gsm = make.gmCompositionalGaussianSpatialModel( + data = Zc, coords = X, V = "alr", formula = ~1, + model = gsm.f$model +) +``` +Other empirical structural functions (e.g. logratio variograms from package "compositions") and their theoretical counterparts (e.g. "CompLinModCoReg" objects from "compositions" or "LMCAnisCompo" from "gmGeostats") can also be estimated resp. fitted and given to the argument `model` of this call to `make.gmCompositionalGaussianSpatialModel`. Other additional arguments are the mean value in case of simple (co)kriging, or descriptors of the local neighbourhood (see `?make.gmCompositionalGaussianSpatialModel` for more information), both using the same parameter names as in `?gstat::gstat` for ease of use. -This model can then be validated, interpolated and/or simulated. The workflow for each of these tasks is always: +The resulting geostatistical model (including conditioning data and structural model) can then be validated, interpolated and/or simulated. The workflow for each of these tasks is always: -1.- define some method parameters with a tailored function, e.g. `LeaveOneOut()` for validation, `KrigingNeighbourhood()` for cokriging or `SequentialSimulation()` for sequential Gaussian Simulation +1.- define some method parameters with a tailored function, e.g. `LeaveOneOut()` for validation, `KrigingNeighbourhood()` for cokriging (the neighbourgood can also be appended to `make.*SpatialModel()`-calls) or `SequentialSimulation()` for sequential Gaussian Simulation -2.- if desired, define some new locations where to interpolate or simulate, using `expand.grid()` or `sp::GridTopology()` or similar +2.- if desired, define some new locations where to interpolate or simulate, using `expand.grid()`, `sp::GridTopology()` or alternatives from other packages -3.- call an appropriate function, specifying the model, potential new data, and the parameters created in the preceding steps; e.g. `validate(model, pars)` for validation, or `predict(model, newdata, pars)` for interpolation or simulation +3.- call an appropriate analysis function, specifying the model, potential new data, and the parameters created in the preceding steps; e.g. `validate(model, pars)` for validation, or `predict(model, newdata, pars)` for interpolation or simulation More information can be found in the package vignette. \ No newline at end of file diff --git a/README.md b/README.md index 7d8eb2dc824361d75fe4dae0670f666f53fb9eda..88d53314e0a6d21a0ac5727e8e836bb3511d07ff 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Read the package vignette for an extended scheme of the package functionality. The fundamental steps are: ``` r -## load the package and its dependencies +## load the package (NOTE: do not load "compositions" or "gstat" afterwards!) library(gmGeostats) #> Welcome to 'gmGeostats', a package for multivariate geostatistical analysis. #> Note: use 'fit_lmc' instead of fit.lmc @@ -81,24 +81,46 @@ vm = gstat::vgm(model="Sph", range=25, nugget=1, psill=1) gsm.f = fit_lmc(v = vge, g = gsm, model = vm) ## plot -variogramModelPlot(vge, model = gsm.f) +variogramModelPlot(vge, model = gsm.f, col="red") ``` -<img src="man/figures/README-structural-1.png" width="100%" /> - -This model can then be validated, interpolated and/or simulated. The -workflow for each of these tasks is always: - -1.- define some method parameters with a tailored function, e.g. -`LeaveOneOut()` for validation, `KrigingNeighbourhood()` for cokriging -or `SequentialSimulation()` for sequential Gaussian Simulation +<img src="man/figures/README-structural-1.png" width="100%" /> The +resulting variogram model (`gsm.f$model` in case of a “gstat†object) +can the be appended to the spatial model, with + + gsm = make.gmCompositionalGaussianSpatialModel( + data = Zc, coords = X, V = "alr", formula = ~1, + model = gsm.f$model + ) + +Other empirical structural functions (e.g. logratio variograms from +package “compositionsâ€) and their theoretical counterparts +(e.g. “CompLinModCoReg†objects from “compositions†or “LMCAnisCompo†+from “gmGeostatsâ€) can also be estimated resp. fitted and given to the +argument `model` of this call to +`make.gmCompositionalGaussianSpatialModel`. Other additional arguments +are the mean value in case of simple (co)kriging, or descriptors of the +local neighbourhood (see `?make.gmCompositionalGaussianSpatialModel` for +more information), both using the same parameter names as in +`?gstat::gstat` for ease of use. + +The resulting geostatistical model (including conditioning data and +structural model) can then be validated, interpolated and/or simulated. +The workflow for each of these tasks is always: + +1.- define some method parameters with a tailored function, +e.g. `LeaveOneOut()` for validation, `KrigingNeighbourhood()` for +cokriging (the neighbourgood can also be appended to +`make.*SpatialModel()`-calls) or `SequentialSimulation()` for sequential +Gaussian Simulation 2.- if desired, define some new locations where to interpolate or -simulate, using `expand.grid()` or `sp::GridTopology()` or similar +simulate, using `expand.grid()`, `sp::GridTopology()` or alternatives +from other packages -3.- call an appropriate function, specifying the model, potential new -data, and the parameters created in the preceding steps; e.g. -`validate(model, pars)` for validation, or `predict(model, newdata, +3.- call an appropriate analysis function, specifying the model, +potential new data, and the parameters created in the preceding steps; +e.g. `validate(model, pars)` for validation, or `predict(model, newdata, pars)` for interpolation or simulation More information can be found in the package vignette. diff --git a/man/figures/README-structural-1.png b/man/figures/README-structural-1.png index 834c1310700442142e8495bb62c6a2627021a5ed..bdf6d08d04195495b4e4cea749bccbff316069f7 100644 Binary files a/man/figures/README-structural-1.png and b/man/figures/README-structural-1.png differ diff --git a/vignettes/gmGeostats.Rmd b/vignettes/gmGeostats.Rmd index 03e74d4b80c24d1bf187c1fbf3a0dfe25268a6d6..fef86b67920c128911b33fd55fe2d96de599f949 100644 --- a/vignettes/gmGeostats.Rmd +++ b/vignettes/gmGeostats.Rmd @@ -17,7 +17,7 @@ knitr::opts_chunk$set( ## The basics -"gmGeostats" is a package for multivariate geostatistics, focusing in the usage of data from restricted sampling spaces. Such data include positive data, compositional data, distributional data and the like. Most of the times, the geostatistical analysis of such data includes three steps: +"gmGeostats" is a package for multivariate geostatistics, focusing in the usage of data from multivariate restricted sampling spaces. Such data include positive data, compositional data, distributional data and the like. Most of the times, the geostatistical analysis of such data includes three steps: 1. express your data as some vectors of real values, through a mapping. Such mappings can be isomorphisms (for Euclidean spaces) or embeddings (for regular manifolds) 2. analyse the resulting multivariate data with vectorial methods (i.e. using cross-variograms, cokriging, cosimulation or distance based methods modified in such a way that they are rotation-invariant resp. affine equivariant) @@ -27,7 +27,7 @@ The package is loaded, as usual with ```{r setup} library(gmGeostats) ``` -and it fundamentally depends on packages "compositions", "gstat" and "sp". Other dependencies are more instrumental and less fundamental. +and it fundamentally depends on packages "compositions", "gstat" and "sp". Other dependencies are more instrumental and less fundamental. NOTE: if you separately need "compositions" or "gstat", load these packages first, then load "gmGeostats". This will ensure that the overloaded functions work properly for all three packages. Alternatively, use fully qualified names (e.g. `pkg::foo()`). This vignette very briefly presents the steps to follow in several analysis and modelling routes, illustrated with the case of compositional data. No explanations, theory or discussion is included. @@ -38,15 +38,15 @@ This vignette very briefly presents the steps to follow in several analysis and The data can be visually inspected with scatterplots, in raw representation (`plot()`, `pairs()`), in ternary diagrams (`compositions::plot.acomp()`), and in scatterplots of logratio transformed data (use the transformations `pwlr()`, `alr()`, `clr()` or `ilr()` from package "compositions", then `pairs()`). Function `pairs()` can be given panel functions such as e.g. `vp.lrdensityplot()`, `vp.kde2dplot()` or `vp.lrboxplot()` resp. for creating histograms of pairwise logratios, 2D kernel density maps on the scatterplots or boxplots of the pairwise logratios. Package "compositions" provides the class "acomp" to directly deal with the right representation in the several methods. -Principal component analysis is also a strong help. For this, you need an isometry (not just an isomorphism). Transformation `clr()` is the best for this, and is actually automatically used when you do `princomp(acomp(YOURDATA))`. "gmGeostats" provides generalised diagonalisation methods, see `?genDiag` for details. +Principal component analysis is also a strong help. For this, you need an isometry (not just an isomorphism). Transformation `clr()` is the best for this, and is actually automatically used when you do `princomp(acomp(YOURDATA))`. "gmGeostats" provides generalised diagonalisation methods to account for the spatial dependence, see `?genDiag` for details. ### Spatial analysis -Create your spatial objects by connecting the spatial coordinates to the multivariate observations via the functions `sp:SpatialPointsDataFrame()` or better the "gmGeostats" functions `make.gmMultivariateGaussianSpatialModel()` for multivariate data and `make.gmCompositionalGaussianSpatialModel()` for compositional data. This produces objects of spatial data container class "gmSpatialModel", that are necessary for the rest of the analysis and modelling. +Create your spatial objects by connecting the spatial coordinates to the multivariate observations via the functions `sp:SpatialPointsDataFrame()` or better the "gmGeostats" functions `make.gmMultivariateGaussianSpatialModel()` for multivariate data and `make.gmCompositionalGaussianSpatialModel()` for compositional data. The functions `make.gm******SpatialModel()` produces objects of spatial data container class "gmSpatialModel", that are necessary for the rest of the analysis and modelling. Swath plots are available with command `swath()`. If you give it an "acomp" object you will obtain a matrix of logratio swath plots. Otherwise you will get an set of swath plots, one for each variable. Function `pairsmap()` works similarly, but produces bubble maps (you can control size and color of the symbols). -Empirical variograms can be obtained with function `variogram()` out of the spatial data container. You can also use the function `logratioVariogram()` for compositional data. Both accept anisotropy. Their output can be plotted with `plot()`, which has specific methods for compositional and non-compositional data. In the case of anisotropic data, you can also use `image()` to visualise the variogram maps. +Empirical variograms can be obtained with function `variogram()` out of the spatial data container. You can also use the function `logratioVariogram()` for compositional data. Both accept anisotropy. Their output can be plotted with `plot()`, which has specific methods for compositional and non-compositional data. In the case of anisotropic data, you can also use a method of `image()` to visualise the variogram maps, see `?image.logratioVariogramAnisotropy` for details. Finally you can also check for the strength of the spatial dependence with the test `noSpatCorr.test()`. This is a permutations test, which null hypothesis is that the data do not exhibit spatial autocorrelation. @@ -60,15 +60,15 @@ Plotting of LMCs against their empirical variograms can be done with function `v ### Variogram and neighbourhood validation -Neighbourhood descriptions are created with function `KrigingNeighbourhood()`. Kriging neighbourhoods and LMC variogram models and can be attached to the "gmSpatialModel" objects at the moment of creation via `make.*()` functions, using arguments `ng` and `model`. +Neighbourhood descriptions are created with function `KrigingNeighbourhood()`. Kriging neighbourhoods and LMC variogram models and can be attached to the "gmSpatialModel" objects at the moment of creation via `make.gm*()` functions, using arguments `ng` and `model` (this last one strictly requiring you to also specify the `formula` argument). -Validation of the model or of the neighbourhood can then be obtained with the function `validate()`. This requires an `object` (the complete "gmSpatialModel") and a `strategy`. Validation strategies are small S3-objects describing what will exactly be done in the validation. They can be quickly defined by means of configuration functions as `LeaveOneOut()` or `NfoldCrossValidation()`. The call to `validate()` will provide some output that can be evaluated with functions such as `xvErrorMeasures()` +Validation of the model or of the neighbourhood can then be obtained with the function `validate()`. This requires an `object` (the complete "gmSpatialModel") and a `strategy`. Validation strategies are small S3-objects describing what will exactly be done in the validation. They can be quickly defined by means of configuration functions as `LeaveOneOut()` or `NfoldCrossValidation()`. The call to `validate()` will provide some output that can be evaluated with functions such as `xvErrorMeasures()`, `accuracy()` or `prediction()`. ### Cokriging and mapping -This way of working is common to the package. You always build a model (with a `make.*()` function), define a method parameter object (created with a specific, verbose, helper function), and feed both to a common umbrella function describing what do you want to do: `validate()` or `predict()`, which also requires an argument `newdata` as is standard in R. The output can then be postprocessed by specific functions. +This way of working is common to the package. You always build a model (with a `make.*()` function), define a method parameter object (created with a specific, verbose, helper function), and feed both to a common umbrella function describing what do you want to do: `validate()` or `predict()`, the second one also requires an argument `newdata` as is standard in R. The output can then be postprocessed by specific functions. The method parameter for cokriging is actually the neighbourhood. So, you can give `predict()` an object resulting from `KrigingNeighbourhood()`, otherwise it will take the standard one stored in the "gmSpatialModel", or produce a global neighbourhood cokriging if no neighbourhood description is found. @@ -89,13 +89,13 @@ Cosimulation method parameters are created by calls to one of the functions `Seq ### Postprocessing -Multivariate cosimulation output can be seen analogue to a 3-dimensional array, with one dimension running along the simulated locations, one dimension along the realisations and one dimension along the variables. This structure is captured in "gmGeostats" with an object of class "DataFrameStack" (extending "data.frame" and mimicking arrays). Point-wise, simulation-wise and variable-wise ransformations on this array can be computed with function `gmApply()`, a wrapper on `base::apply()` allowing for an easier management of the dimensions of the simulation stack. Maps can also be produced by function `image_cokriged()`. +Multivariate cosimulation output can be seen analogue to a 3-dimensional array, with one dimension running along the simulated locations, one dimension along the realisations and one dimension along the variables. This structure is captured in "gmGeostats" with an object of class "DataFrameStack" (extending "data.frame" and mimicking arrays). Point-wise, simulation-wise and variable-wise transformations on this array can be computed with function `gmApply()`, a wrapper on `base::apply()` allowing for an easier management of the dimensions of the simulation stack. Maps can also be produced by function `image_cokriged()`. ## Multipoint simulation -Multipoint cosimulation is available with the same strategy than Gaussian based simulation. One needs first to define a "gmSpatialModel" containing the conditioning data (the original data) and the stochastic model (the training image). Second, a simulation grid must be created, and provided to `predict()` as the `newdata` argument. And third, we must provide some method parameters defining the simulation algorithm to use: currently, only direct sampling is available, and its parameters can be obtained calling `DSpars()`. +Multipoint cosimulation is available with the same strategy than Gaussian based simulation. One needs first to define a "gmSpatialModel" containing the conditioning data (the original data) and the stochastic model (the training image). Second, a simulation grid must be created, and provided to `predict()` as the `newdata` argument. And third, we must provide some method parameters defining the simulation algorithm to use: currently, only direct sampling is available, and its parameters can be constructed calling `DSpars()`. Training images are currently objects of class "SpatialPixelsDataFrame" or "SpatialGridDataFrame", from package "sp". The conditioning data will be migrated to the simulation grid internally; the grid topologies for simulation and training image will be checked for consistency. diff --git a/vignettes/register_new_layer_datatype.Rmd b/vignettes/register_new_layer_datatype.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..8b4c0a9b59642fa2db6821087d8db8c414bdb218 --- /dev/null +++ b/vignettes/register_new_layer_datatype.Rmd @@ -0,0 +1,159 @@ +--- +title: "How to register new layer datatypes" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{How to register new layer datatypes} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Purpose of this vignette + +This vignette explains how to tweak "gmGeostasts" to declare new datatypes for data layers (e.g. for data with special characteristics besides compositional data), and associate creation and prediction function to it. If you are looking for a general introduction to the package, see [this other vignette](../doc/gmGeostats.html) + +```{r setup} +library(compositions) +library(gstat) +library(gmGeostats) +library(RandomFields) +library(magrittr) +``` + + + +## Statistical scale and representation functions + +The statistical scale of a data layer is a subjective assessment of the way in which pairs of values of that layer need to be compared. Classical statistical scales after Stevens (XXXX) are the nominal (two values are either equal or they are different), ordinal (two values are either equal, or one is larger than the other), interval (values can be meaningfully compared by the mathematical operation of subtraction) and ratio (values are strictly positive and can be meaningfully compared by the operation of quotient). Other scales have been introduced, such as several compositional scales for data about the amounts and proportions of components forming a system (Aitchison, 1986; van den Boogaart and Tolosana-Delgado, 2013; van den Boogaart, Tolosana-Delgado and Bren, XXXX); for circular and spherical data; for distributional data; for positive definite matrices; etc. + +A scale $s$ is then typically described by a way of computing the difference between two values $d_s(\cdot, \cdot)$, coupled with a description of the set $\mathcal{E}_s$ of values of that layer that are at all possible. In the case of circular data, the set of possible values is $[-\pi, \pi)$, and given the periodicity condition, the way to compare two values $a$ and $b$ is $(a-b)$ modulo $\pi$. + +To ease the computation with observations $x$ on such layers we want to define a transformation $R(x)$ that delivers a *representation* $z=R(x)$ of the data, such that: (i) $R^{-1}(z)$ exists for all values of the linear span of $R(x)$, (ii) it can be ensured that $R^{-1}(z)\in \mathcal{E}_s$, *and* (iii) that $d_s(x_1, x_2)\approx R(x_1)-R(x_2)$. A classical representation strategy of circular data is through an embedding into $R^2$ the bivariate real space, by means of the transformation +$$ +R(\theta)=[\sin(\theta), \cos(\theta)]=\mathbf{z} +$$ +with inverse operation +$$ +\theta = \tan^{-1} \frac{z_2}{z_1} +$$ + +For our purposes, the absolute minimum you need to program is: + +1. choose a class name for your data tyoe (e.g. "periodic"), and create a function with that name taking the values as they might be in your case study, converting them into a datamatrix and giving them the class of your choice +```{r} +circular = function(x, varname ="theta", conversion=pi/180){ + # output to be a (N, 1)-datamatrix + if(length(dim(x))!=2){ # case `x` is a vector + y = t(t(x)) + colnames(y) = varname + }else if(nrow(x)!=1){ # case `x` is a too large matrix + y = x[, varname] + } + y = y * conversion + class(y) = "circular" + return(y) +} +``` +(The function can do other things, like in this case, allowing a potential conversion from degrees to radians, or managing several input cases). + +2. create method of the function `compositions::cdt()` implementing the representation for data of your type and returning an rmult object +```{r} +cdt.circular = function(x, ...){ + z = cbind(sin(x), cos(x)) + colnames(z) = c("z1", "z2") + return(rmult(z, orig=x)) +} +``` + +3. create method of the function `compositions::cdtInv()` implementing the backrepresentation for data of your type (argument `z` expects the representation, and `orig` must be exactly what is set in this example; NOTICE the three dots at `compositions:::gsi.orig`) +```{r} +cdtInv.circular = function(z, orig=compositions:::gsi.orig(x),...){ + #z = unclass(z) + x = atan2(z[,2], z[,1]) + return(circular(x, varname=colnames(orig), conversion = 1)) +} +``` + +## Geostatistical analysis of scaled data, quick and dirty + +With these few lines of programming you could already be able to use "gmGeostats" for your data. To show how, we will generate first a univariate, real-valued random field, take it as if it were circular data (in radians), extract some components out of it, and do a geostatistical analysis with the output. +```{r data_creation, fig.height=7, fig.width=7} +# simulate a random function +set.seed(333275) +model <- RMexp() +x <- seq(0, 10, 0.1) +z <- RFsimulate(model, x, x, n=1) +# extract components +X = coordinates(z) +Z = z@data +# select some of them +tk = sample(1:nrow(X), 1000) +Xdt = X[tk,] +Zdt = Z[tk,1] +Zdtc = circular(Zdt,varname = "theta", conversion = 1) +pairsmap(Zdtc, loc=Xdt) +``` + +Now we can proceed with the analysis. First we create the "gmSpatialModel" containing the transformed data +```{r} +theta.gg = + make.gmMultivariateGaussianSpatialModel( + data=cdt(Zdtc), coords = Xdt, # always set V="clr" in such cases! + formula = ~1 # for ordinary (co)kriging + ) +``` +compute and plot the variogram +```{r, fig.width=7, fig.height=5} +theta.vg = gmGeostats::variogram(theta.gg) +plot(theta.vg) +``` +and model it, in this case with an exponental of effective range approximately 3, a sill of 0.5, and a nugget close to zero. All ways of modelling variograms are allowed, for instance with "gstat" variograms +```{r, fig.width=7, fig.height=5} +theta.md = gstat::vgm(model="Exp", range=1, psill=0.5) +theta.gs = fit_lmc(v=theta.vg, g = theta.gg, model = theta.md) +plot(theta.vg, model=theta.gs$model) +``` +Finally we extend the original data container with this model +```{r} +theta.gg = + make.gmMultivariateGaussianSpatialModel( + data=cdt(Zdtc), coords = Xdt, # always set V="clr" in such cases! + formula = ~1, # for ordinary (co)kriging + model = theta.gs$model + ) +``` +The outcome can then be used for validation, prediction or simulation. Here we do cokriging on the same grid we simulated above +```{r} +xx = expand.grid(x,x) +colnames(xx) = colnames(Xdt) +ng = KrigingNeighbourhood(nmax = 20, omax=7, maxdist=1) +theta.prds = predict(theta.gg, newdata = xx, pars=ng) +``` +and the result be reordered and backtransformed +```{r} +theta.prds.grid = gsi.gstatCokriging2rmult(theta.prds) +theta.prds.back = backtransform(theta.prds.grid, as = cdt(Zdtc)) +summary(theta.prds.back) +``` +Note that the function `backtransform()` is available in package "compositions" from version 1.0.1-9002. To plot the result we might have to program a method for `image_cokriged` that should take care to fictionally reclass the backtransformed data to "spatialGridRmult" and choose a color sequence appropriate for the periodic nature of the data +```{r, fig.width=6, fig.height=8.5} +image_cokriged.circular = function(x, ...){ + class(x) = c("spatialGridRmult", "rmult") + image_cokriged(x, breaks=40, col=rainbow(10), ...) +} +image_cokriged(theta.prds.back, ivar="theta") +``` + + +## future work + +In future extensions of this vignette we will discuss the way to create own structural functions (variograms) and estimation models/methods adapted to the nature of the data, and register them to the package. + + +