From 26ccbe0f702a84eab6d686f578fadb267c10dce0 Mon Sep 17 00:00:00 2001 From: Raimon Tolosana-Delgado <tolosa53@fwg206.ad.fz-rossendorf.de> Date: Fri, 5 Nov 2021 13:28:48 +0100 Subject: [PATCH] gmGeostats variogram estimation improved --- DESCRIPTION | 5 +- NAMESPACE | 1 + NEWS.md | 4 + R/variograms.R | 206 +++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 189 insertions(+), 27 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3cf35e0..0fe1f73 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: gmGeostats -Version: 0.11.0-9000 -Date: 2021-10-20 +Version: 0.11.0-9001 +Date: 2021-11-05 Title: Geostatistics for Compositional Analysis Authors@R: c(person(given = "Raimon", family = "Tolosana-Delgado", @@ -60,6 +60,7 @@ VignetteBuilder: knitr LazyData: true Collate: 'Anamorphosis.R' + '__preliminary_3D_Rosie.R' 'preparations.R' 'gstatCompatibility.R' 'gmAnisotropy.R' diff --git a/NAMESPACE b/NAMESPACE index 1d5250e..b17e337 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -179,6 +179,7 @@ export(gridOrder_gstat) export(gridOrder_sp) export(gsi.DS) export(gsi.EVario2D) +export(gsi.EVario3D) export(gsi.TurningBands) export(gsi.gstatCokriging2compo) export(gsi.gstatCokriging2rmult) diff --git a/NEWS.md b/NEWS.md index 92a92cc..eae19e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# gmGeostats 0.11.0-9001 + +* (2021-11-05) bugs corrected in internal 2D empirical variogram function `gsi.EVario2D()` . First version of 3D internal variogram function `gsi.EVario3D()` available. Usage of these functions is only for specialists foreseen. In the near future a user-friendlier wrapper will be provided. + # gmGeostats 0.11.0-9000 * (2021-10-20) section on the vignette "register_new_layer_datatype.Rmd" about the definition and registration of empirical covariance for circular data. diff --git a/R/variograms.R b/R/variograms.R index 5ff48e0..1f68ea5 100644 --- a/R/variograms.R +++ b/R/variograms.R @@ -627,31 +627,30 @@ gsi.EVario2D = function(X,Z,Ff=rep(1, nrow(X)), colnames(azimuths) = c("minaz","maxaz") }else stop("azimuths can be either a vector of lags or a data.frame, see ?gsi.EVario2D") - # compute residuals + # compute pairs of locations + ij = expand.grid(1:nrow(X), 1:nrow(X))# indices + XX = X[ij[,1],]-X[ij[,2],] # locations + XXabs = gmApply(XX, 1, function(x) sqrt(sum(x^2))) + XXaz = gmApply(XX, 1, function(x) pi/2-atan2(x[2],x[1])) +2*pi + XXaz = XXaz %% pi # residual to 180?? + + # compute residuals and pairs of variables appropriate to the structural function if(cov){ - kk = 1 - if(dim(Z)==dim(Ff)){ - Z = Z-Ff - }else if(ncol(Z)==length(c(unlist(Ff)))){ - Z = sweep(Z, 2, Ff, "-") + if(all(dim(Z)==dim(Ff))){ + Z = as.matrix(Z-Ff) + }else if(Dv==length(c(unlist(Ff)))){ + Z = as.matrix(sweep(Z, 2, Ff, "-")) } + ZZ = outer(Z,Z) # variables + ZZ = aperm(ZZ, c(1,3,2,4)) + dim(ZZ) = c(N*N, Dv, Dv) }else{ - kk = 1 # 2 # we consider each pair only once Z = lm(as.matrix(Z)~as.matrix(Ff)+0)$residuals ## ideally this should be a GLS fit + ZZ = Z[ij[,1],]-Z[ij[,2],] } - # compute pairs - ij = expand.grid(1:nrow(X), 1:nrow(X))# indices - op = ifelse(cov, "*", "-") - ZZ = outer(as.matrix(Z),as.matrix(Z), op) # variables - ZZ = aperm(ZZ, c(1,3,2,4)) - dim(ZZ) = c(N*N, Dv, Dv) - XX = X[ij[,1],]-X[ij[,2],] # locations - XXabs = gmApply(XX, 1, function(x) sqrt(sum(x^2))) - XXaz = gmApply(XX, 1, function(x) pi/2-atan2(x[2],x[1])) +2*pi - XXaz = XXaz %% pi # residual to 180° + # output - ## ATTENTION: needs to be changed to return a structure (3,Na)-matrix of objects, # like logratioVariogramAnisotropy Nh = nrow(lags) @@ -671,8 +670,15 @@ gsi.EVario2D = function(X,Z,Ff=rep(1, nrow(X)), } n[,i] = colSums(tk_h) for(j in 1:Nh){ - if(n[j,i]>minpairs){ - vg[j,,,i] = gmApply(zz[tk_h[,j],,], c(2,3),"sum")/(kk*n[j,i]) + if(!is.na(n[j,i]) && n[j,i]>minpairs){ + if(cov){ # covariance function + vg[j,,,i] = gmApply((ZZ[tk_a,,][tk_h[,j],,]), c(2,3),"sum")/(n[j,i]) + }else{ # semi-variogram + aux = rowSums( + gmApply(X=ZZ[tk_a,][tk_h[,j],], 1, function(x)outer(x,x,"*")) + )/(2*n[j,i]) + vg[j,,,i] = aux + } }else{ vg[j,,,i]=NA } @@ -688,6 +694,156 @@ gsi.EVario2D = function(X,Z,Ff=rep(1, nrow(X)), return(res) } +#' Empirical variogram or covariance function in 3D +#' +#' compute the empirical variogram or covariance function in a 3D case study +#' +#' @param X matrix of Nx3 columns with the geographic coordinates +#' @param Z matrix or data.frame of data with dimension (N,Dv) +#' @param Ff for variogram, matrix of basis functions with nrow(Ff)=N +#' (can be a N-vector of 1s; should include the vector of 1s!!); +#' for covariance function, a (N,Dv)-matrix or a Dv-vector giving the mean values +#' @param maxdist maximum lag distance to consider +#' @param lagNr number of lags to consider +#' @param lags if maxdist and lagNr are not specified, either: (a) a matrix of 2 columns giving +#' minimal and maximal lag distance defining the lag classes to consider, or (b) a vector of lag breaks +#' @param dirvecs matrix which rows are the director vectors along which variograms will be computed (these will be normalized!) +#' @param angtol scalar, angular tolerance applied (in degrees; defaults to 90??, ie. isotropic) +#' @param maxbreadth maximal breadth (in lag units) orthogonal to the lag direction (defaults to `Inf`, ie. not used) +#' @param minpairs minimal number of pairs falling in each class to consider the calculation sufficient; defaults to 10 +#' @param cov boolean, is covariance (TRUE) or variogram (FALSE) desired? defaults to variogram +#' +#' @return An empirical variogram for the provided data. NOTE: avoid using directly gsi.* functions! They +#' represent either internal functions, or preliminary, not fully-tested functions. Use \code{\link{variogram}} instead. +#' +#' +#' @export +#' @family gmEVario functions +#' +# @examples +# dt <- readr::read_csv("~/Geochem_sum_imp2.csv") +# +# X = as.matrix(dt[,c("X","Y","Z")]) +# Z = as.matrix(compositions::clr(dt[,c("Cu","Zn","Pb", "As", "Cd", "In", "Other")])) +# dirvecs = rbind( c(1,0,0), c(1,1,0), c(0,1,0), c(-1,1,0), c(0,0,1)) +# vge = gsi.EVario3D(X, Z, dirvecs=dirvecs, maxdist=50, angtol=10, maxbreadth=5, +# cov = TRUE, Ff = colMeans(Z)) +# dim(vge) +# dimnames(vge) +# class(vge["gamma",1]) +# dim(vge["gamma",1][[1]]) +# vge["npairs",1] +# vge["lags",1] +# plot.gmEVario(vge, varnames = colnames(Z), commonAxis=FALSE, +# vdir.up = 1:4, vdir.lo=5, xlim.up=c(0,50), xlim.lo=c(0,15) +# ) +## ie. the 4 planar directions on the upper triangle, +## the downhole direction in the lower triangle +gsi.EVario3D = function(X,Z,Ff=rep(1, nrow(X)), + maxdist= max(dist(X[sample(nrow(X),min(nrow(X),1000)),]))/2, + lagNr = 15, lags = seq(from=0, to=maxdist, length.out=lagNr+1), + dirvecs=t(c(1,0,0)), angtol=90, + maxbreadth=Inf, minpairs=10, cov=FALSE){ + # dimensions + N = nrow(X) + Dv = ncol(Z) + Dg = ncol(X) + stopifnot(N==nrow(Z)) + if(length(dim(Ff))==0){ + stopifnot(length(Ff) %in% c(N, Dv)) + }else{ + stopifnot(nrow(Ff) %in% c(N, 1) ) + } + ## expand the information given into a set of columns stating conditions + # prepare lags + if(length(dim(lags))==0){ + lags = data.frame(minlag=lags[-length(lags)], maxlag=lags[-1]) + # if(maxbreadth!=Inf) + lags[,"maxbreadth"] = maxbreadth + }else if(dim(lags)==2){ + lags = data.frame(lags) + colnames(lags) = c("minlag","maxlag","maxbreadth")[1:ncol(lags)] + }else stop("lags can be either a vector of lags or a data.frame, see ?gsi.EVario3D") + lagsSq = lags^2 + + # prepare directions with tolerance + if(length(dim(dirvecs))==0){ + if(length(dirvecs)!=3) stop("dirvecs must be a matrix with 3 columns!") + dirvecs = t(dirvecs) + } + dirvecs = dirvecs / sqrt(rowSums(dirvecs^2)) + if(length(angtol) %in% c(1, nrow(dirvecs))){ + dirvecs = cbind(dirvecs, tol=cos(angtol*pi/180) ) + }else stop("angtol can be either a scalar or a vector of length=nrow(dirvecs), see ?gsi.EVario3D") + + # compute pairs of locations + ij = expand.grid(1:nrow(X), 1:nrow(X))# indices + XX = X[ij[,1],]-X[ij[,2],] # locations + XXmodSq = gmApply(XX, 1, function(x) sum(x^2)) + XXmod = sqrt(XXmodSq) + + # compute residuals and pairs of variables appropriate to the structural function + if(cov){ + if(all(dim(Z)==dim(Ff))){ + Z = as.matrix(Z-Ff) + }else if(Dv==length(c(unlist(Ff)))){ + Z = as.matrix(sweep(Z, 2, Ff, "-")) + } + ZZ = outer(Z,Z) # variables + ZZ = aperm(ZZ, c(1,3,2,4)) + dim(ZZ) = c(N*N, Dv, Dv) + }else{ + Z = lm(as.matrix(Z)~as.matrix(Ff)+0)$residuals ## ideally this should be a GLS fit + ZZ = Z[ij[,1],]-Z[ij[,2],] + } + + # output + ## ATTENTION: needs to be changed to return a structure (3,Na)-matrix of objects, + # like logratioVariogramAnisotropy + Nh = nrow(lags) + Na = nrow(dirvecs) + vg = array(0, dim=c(Nh, Dv, Dv, Na), + dimnames=list(rownames(lags), colnames(Z), colnames(Z), rownames(dirvecs)) + ) + n = array(0, dim=c(Nh, Na)) + res = sapply(1:Na, function(i){ + projections = abs(XX %*% dirvecs[i,1:3]) + cosinus = ifelse(XXmod==0,0,projections/XXmod) + tk_a = cosinus > dirvecs[i,"tol"] + xxabs = XXmod[tk_a] + tk_h = outer(xxabs, lags[,1],">=") & outer(xxabs, lags[,2],"<=") + if(ncol(lags)>2){ + residualsSq = XXmodSq[tk_a]-projections[tk_a]^2 + tk_b = outer(residualsSq, lagsSq[,3], "<=") + tk_h = tk_h & tk_b + } + n[,i] = colSums(tk_h) + for(j in 1:Nh){ + if(!is.na(n[j,i]) && n[j,i]>minpairs){ + if(cov){ # covariance function + vg[j,,,i] = gmApply((ZZ[tk_a,,][tk_h[,j],,]), c(2,3),"sum")/(n[j,i]) + }else{ # semi-variogram + aux = rowSums( + gmApply(X=ZZ[tk_a,][tk_h[,j],], 1, function(x)outer(x,x,"*")) + )/(2*n[j,i]) + vg[j,,,i] = aux + } + }else{ + vg[j,,,i]=NA + } + } + return(list(gamma=vg[,,,i], lags=gmGeostats:::gsi.lagClass(lags), npairs =n[,i])) + }) + + # output + attr(res, "directions") = gmGeostats:::gsi.directorVector(dirvecs[,1:3]) + # attr(res, "lags") = gsi.lagClass(lags) + attr(res, "type") = ifelse(cov, "covariance","semivariogram") + class(res) = "gmEVario" + return(res) +} + + @@ -756,7 +912,7 @@ plot.gmEVario = function(x, xlim.up=NULL, xlim.lo=NULL, vdir.up= NULL, vdir.lo= xlim.lo = c(0, maxdist) } } - + opar = par() opar = par_remove_readonly(opar) @@ -772,8 +928,8 @@ plot.gmEVario = function(x, xlim.up=NULL, xlim.lo=NULL, vdir.up= NULL, vdir.lo= for(j in 1:Dv){ if((i>=j)&(!is.null(vdir.lo))){ par(mfg=c(i+1,j,Dv+1,Dv+1)) - ylim = range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,i,j])) - if(commonAxis) ylim=range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,,j])) + ylim = range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,i,j]), na.rm = TRUE) + if(commonAxis) ylim=range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,,j]), na.rm=TRUE) myplot( sapply(vdir.lo, function(kk) gsi.midValues.lagClass(x["lags",kk][[1]])), sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,i,j]), ylim=ylim, ...) @@ -785,8 +941,8 @@ plot.gmEVario = function(x, xlim.up=NULL, xlim.lo=NULL, vdir.up= NULL, vdir.lo= } if((i<=j)&(!is.null(vdir.up))){ par(mfg=c(i,j+1,Dv+1,Dv+1)) - ylim = range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,i,j])) - if(commonAxis) ylim=range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,,j])) + ylim = range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,i,j]), na.rm = TRUE) + if(commonAxis) ylim=range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,,j]), na.rm=TRUE) myplot( sapply(vdir.up, function(kk) gsi.midValues.lagClass(x["lags",kk][[1]])), sapply(vdir.up, function(kk) x["gamma",kk][[1]][,i,j]), ylim=ylim, ...) -- GitLab