From 26ccbe0f702a84eab6d686f578fadb267c10dce0 Mon Sep 17 00:00:00 2001
From: Raimon Tolosana-Delgado <tolosa53@fwg206.ad.fz-rossendorf.de>
Date: Fri, 5 Nov 2021 13:28:48 +0100
Subject: [PATCH] gmGeostats variogram estimation improved

---
 DESCRIPTION    |   5 +-
 NAMESPACE      |   1 +
 NEWS.md        |   4 +
 R/variograms.R | 206 +++++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 189 insertions(+), 27 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 3cf35e0..0fe1f73 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: gmGeostats
-Version: 0.11.0-9000
-Date: 2021-10-20
+Version: 0.11.0-9001
+Date: 2021-11-05
 Title: Geostatistics for Compositional Analysis
 Authors@R: c(person(given = "Raimon", 
         family = "Tolosana-Delgado", 
@@ -60,6 +60,7 @@ VignetteBuilder: knitr
 LazyData: true
 Collate: 
     'Anamorphosis.R'
+    '__preliminary_3D_Rosie.R'
     'preparations.R'
     'gstatCompatibility.R'
     'gmAnisotropy.R'
diff --git a/NAMESPACE b/NAMESPACE
index 1d5250e..b17e337 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -179,6 +179,7 @@ export(gridOrder_gstat)
 export(gridOrder_sp)
 export(gsi.DS)
 export(gsi.EVario2D)
+export(gsi.EVario3D)
 export(gsi.TurningBands)
 export(gsi.gstatCokriging2compo)
 export(gsi.gstatCokriging2rmult)
diff --git a/NEWS.md b/NEWS.md
index 92a92cc..eae19e3 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+# gmGeostats 0.11.0-9001
+
+* (2021-11-05) bugs corrected in internal 2D empirical variogram function `gsi.EVario2D()` . First version of 3D internal variogram function `gsi.EVario3D()` available. Usage of these functions is only for specialists foreseen. In the near future a user-friendlier wrapper will be provided.
+
 # gmGeostats 0.11.0-9000
 
 * (2021-10-20) section on the vignette "register_new_layer_datatype.Rmd" about the definition and registration of empirical covariance for circular data.
diff --git a/R/variograms.R b/R/variograms.R
index 5ff48e0..1f68ea5 100644
--- a/R/variograms.R
+++ b/R/variograms.R
@@ -627,31 +627,30 @@ gsi.EVario2D = function(X,Z,Ff=rep(1, nrow(X)),
     colnames(azimuths) = c("minaz","maxaz")
   }else stop("azimuths can be either a vector of lags or a data.frame, see ?gsi.EVario2D")
   
-  # compute residuals
+  # compute pairs of locations
+  ij = expand.grid(1:nrow(X), 1:nrow(X))# indices
+  XX = X[ij[,1],]-X[ij[,2],] # locations
+  XXabs = gmApply(XX, 1, function(x) sqrt(sum(x^2))) 
+  XXaz = gmApply(XX, 1, function(x) pi/2-atan2(x[2],x[1])) +2*pi
+  XXaz = XXaz %% pi  # residual to 180??
+  
+  # compute residuals and pairs of variables appropriate to the structural function
   if(cov){ 
-    kk = 1
-    if(dim(Z)==dim(Ff)){
-      Z = Z-Ff
-    }else if(ncol(Z)==length(c(unlist(Ff)))){
-      Z = sweep(Z, 2, Ff, "-")
+    if(all(dim(Z)==dim(Ff))){
+      Z = as.matrix(Z-Ff)
+    }else if(Dv==length(c(unlist(Ff)))){
+      Z = as.matrix(sweep(Z, 2, Ff, "-"))
     }
+    ZZ = outer(Z,Z) # variables
+    ZZ = aperm(ZZ, c(1,3,2,4))
+    dim(ZZ) = c(N*N, Dv, Dv)
   }else{
-    kk = 1 # 2 # we consider each pair only once
     Z = lm(as.matrix(Z)~as.matrix(Ff)+0)$residuals ## ideally this should be a GLS fit
+    ZZ = Z[ij[,1],]-Z[ij[,2],]
   }
-  # compute pairs
-  ij = expand.grid(1:nrow(X), 1:nrow(X))# indices
-  op = ifelse(cov, "*", "-")
-  ZZ = outer(as.matrix(Z),as.matrix(Z), op) # variables
-  ZZ = aperm(ZZ, c(1,3,2,4))
-  dim(ZZ) = c(N*N, Dv, Dv)
-  XX = X[ij[,1],]-X[ij[,2],] # locations
-  XXabs = gmApply(XX, 1, function(x) sqrt(sum(x^2))) 
-  XXaz = gmApply(XX, 1, function(x) pi/2-atan2(x[2],x[1])) +2*pi
-  XXaz = XXaz %% pi  # residual to 180Â°
+  
   
   # output
-
   ## ATTENTION: needs to be changed to return a structure (3,Na)-matrix of objects,
   #     like logratioVariogramAnisotropy
   Nh = nrow(lags)
@@ -671,8 +670,15 @@ gsi.EVario2D = function(X,Z,Ff=rep(1, nrow(X)),
     } 
     n[,i] = colSums(tk_h)
     for(j in 1:Nh){
-      if(n[j,i]>minpairs){
-        vg[j,,,i] = gmApply(zz[tk_h[,j],,], c(2,3),"sum")/(kk*n[j,i])
+      if(!is.na(n[j,i]) && n[j,i]>minpairs){
+        if(cov){ # covariance function
+          vg[j,,,i] = gmApply((ZZ[tk_a,,][tk_h[,j],,]), c(2,3),"sum")/(n[j,i])
+        }else{   # semi-variogram
+          aux = rowSums(
+            gmApply(X=ZZ[tk_a,][tk_h[,j],], 1, function(x)outer(x,x,"*")) 
+          )/(2*n[j,i])
+          vg[j,,,i] = aux
+        }
       }else{
         vg[j,,,i]=NA
       }
@@ -688,6 +694,156 @@ gsi.EVario2D = function(X,Z,Ff=rep(1, nrow(X)),
   return(res)
 }
 
+#' Empirical variogram or covariance function in 3D
+#' 
+#' compute the empirical variogram or covariance function in a 3D case study 
+#' 
+#' @param X matrix of Nx3 columns with the geographic coordinates
+#' @param Z matrix or data.frame of data with dimension (N,Dv)
+#' @param Ff for variogram, matrix of basis functions with nrow(Ff)=N 
+#' (can be a N-vector of 1s; should include the vector of 1s!!); 
+#' for covariance function, a (N,Dv)-matrix or a Dv-vector giving the mean values
+#' @param maxdist maximum lag distance to consider
+#' @param lagNr number of lags to consider
+#' @param lags if maxdist and lagNr are not specified, either: (a) a matrix of 2 columns giving 
+#' minimal and maximal lag distance defining the lag classes to consider, or (b) a vector of lag breaks 
+#' @param dirvecs matrix which rows are the director vectors along which variograms will be computed (these will be normalized!) 
+#' @param angtol scalar, angular tolerance applied (in degrees; defaults to 90??, ie. isotropic) 
+#' @param maxbreadth maximal breadth (in lag units) orthogonal to the lag direction (defaults to `Inf`, ie. not used)
+#' @param minpairs minimal number of pairs falling in each class to consider the calculation sufficient; defaults to 10
+#' @param cov boolean, is covariance (TRUE) or variogram (FALSE) desired? defaults to variogram
+#'
+#' @return An empirical variogram for the provided data. NOTE: avoid using directly gsi.* functions! They 
+#' represent either internal functions, or preliminary, not fully-tested functions. Use \code{\link{variogram}} instead.
+#' 
+#' 
+#' @export
+#' @family gmEVario functions
+#'
+# @examples
+# dt <- readr::read_csv("~/Geochem_sum_imp2.csv")
+# 
+# X = as.matrix(dt[,c("X","Y","Z")])
+# Z = as.matrix(compositions::clr(dt[,c("Cu","Zn","Pb", "As", "Cd", "In", "Other")]))
+# dirvecs = rbind( c(1,0,0), c(1,1,0), c(0,1,0), c(-1,1,0), c(0,0,1))
+# vge = gsi.EVario3D(X, Z, dirvecs=dirvecs, maxdist=50, angtol=10, maxbreadth=5,
+# cov = TRUE, Ff = colMeans(Z))
+# dim(vge)
+# dimnames(vge)
+# class(vge["gamma",1])
+# dim(vge["gamma",1][[1]])
+# vge["npairs",1]
+# vge["lags",1]
+# plot.gmEVario(vge, varnames = colnames(Z), commonAxis=FALSE,
+#    vdir.up = 1:4, vdir.lo=5, xlim.up=c(0,50), xlim.lo=c(0,15)
+#    )  
+## ie. the 4 planar directions on the upper triangle,
+## the downhole direction in the lower triangle
+gsi.EVario3D = function(X,Z,Ff=rep(1, nrow(X)),
+                        maxdist= max(dist(X[sample(nrow(X),min(nrow(X),1000)),]))/2, 
+                        lagNr = 15, lags = seq(from=0, to=maxdist, length.out=lagNr+1),
+                        dirvecs=t(c(1,0,0)), angtol=90,
+                        maxbreadth=Inf, minpairs=10, cov=FALSE){
+  # dimensions
+  N = nrow(X)
+  Dv = ncol(Z)
+  Dg = ncol(X)
+  stopifnot(N==nrow(Z))
+  if(length(dim(Ff))==0){
+    stopifnot(length(Ff) %in% c(N, Dv))
+  }else{
+    stopifnot(nrow(Ff) %in% c(N, 1) )
+  }
+  ## expand the information given into a set of columns stating conditions
+  # prepare lags
+  if(length(dim(lags))==0){
+    lags = data.frame(minlag=lags[-length(lags)], maxlag=lags[-1]) 
+    # if(maxbreadth!=Inf) 
+    lags[,"maxbreadth"] = maxbreadth
+  }else if(dim(lags)==2){
+    lags = data.frame(lags)
+    colnames(lags) = c("minlag","maxlag","maxbreadth")[1:ncol(lags)]
+  }else stop("lags can be either a vector of lags or a data.frame, see ?gsi.EVario3D")
+  lagsSq = lags^2
+  
+  # prepare directions with tolerance
+  if(length(dim(dirvecs))==0){
+    if(length(dirvecs)!=3) stop("dirvecs must be a matrix with 3 columns!")
+    dirvecs = t(dirvecs)
+  }
+  dirvecs = dirvecs / sqrt(rowSums(dirvecs^2))
+  if(length(angtol) %in% c(1, nrow(dirvecs))){
+    dirvecs = cbind(dirvecs, tol=cos(angtol*pi/180) )
+  }else stop("angtol can be either a scalar or a vector of length=nrow(dirvecs), see ?gsi.EVario3D")
+  
+  # compute pairs of locations
+  ij = expand.grid(1:nrow(X), 1:nrow(X))# indices
+  XX = X[ij[,1],]-X[ij[,2],] # locations
+  XXmodSq = gmApply(XX, 1, function(x) sum(x^2))
+  XXmod = sqrt(XXmodSq)
+  
+  # compute residuals and pairs of variables appropriate to the structural function
+  if(cov){ 
+    if(all(dim(Z)==dim(Ff))){
+      Z = as.matrix(Z-Ff)
+    }else if(Dv==length(c(unlist(Ff)))){
+      Z = as.matrix(sweep(Z, 2, Ff, "-"))
+    }
+    ZZ = outer(Z,Z) # variables
+    ZZ = aperm(ZZ, c(1,3,2,4))
+    dim(ZZ) = c(N*N, Dv, Dv)
+  }else{
+    Z = lm(as.matrix(Z)~as.matrix(Ff)+0)$residuals ## ideally this should be a GLS fit
+    ZZ = Z[ij[,1],]-Z[ij[,2],]
+  }
+  
+  # output
+  ## ATTENTION: needs to be changed to return a structure (3,Na)-matrix of objects,
+  #     like logratioVariogramAnisotropy
+  Nh = nrow(lags)
+  Na = nrow(dirvecs)
+  vg = array(0, dim=c(Nh, Dv, Dv, Na),
+             dimnames=list(rownames(lags), colnames(Z), colnames(Z), rownames(dirvecs))
+  )
+  n = array(0, dim=c(Nh, Na))
+  res = sapply(1:Na, function(i){
+    projections = abs(XX %*% dirvecs[i,1:3])
+    cosinus = ifelse(XXmod==0,0,projections/XXmod)
+    tk_a = cosinus > dirvecs[i,"tol"]
+    xxabs = XXmod[tk_a]
+    tk_h = outer(xxabs, lags[,1],">=") & outer(xxabs, lags[,2],"<=")
+    if(ncol(lags)>2){
+      residualsSq = XXmodSq[tk_a]-projections[tk_a]^2
+      tk_b = outer(residualsSq, lagsSq[,3], "<=")
+      tk_h = tk_h & tk_b
+    } 
+    n[,i] = colSums(tk_h)
+    for(j in 1:Nh){
+      if(!is.na(n[j,i]) && n[j,i]>minpairs){
+        if(cov){ # covariance function
+          vg[j,,,i] = gmApply((ZZ[tk_a,,][tk_h[,j],,]), c(2,3),"sum")/(n[j,i])
+        }else{   # semi-variogram
+          aux = rowSums(
+            gmApply(X=ZZ[tk_a,][tk_h[,j],], 1, function(x)outer(x,x,"*")) 
+          )/(2*n[j,i])
+          vg[j,,,i] = aux
+        }
+      }else{
+        vg[j,,,i]=NA
+      }
+    }
+    return(list(gamma=vg[,,,i], lags=gmGeostats:::gsi.lagClass(lags), npairs =n[,i]))
+  })
+  
+  # output
+  attr(res, "directions") = gmGeostats:::gsi.directorVector(dirvecs[,1:3])
+  # attr(res, "lags") = gsi.lagClass(lags)
+  attr(res, "type") = ifelse(cov, "covariance","semivariogram")
+  class(res) = "gmEVario"
+  return(res)
+}
+
+
 
 
 
@@ -756,7 +912,7 @@ plot.gmEVario = function(x, xlim.up=NULL, xlim.lo=NULL, vdir.up= NULL, vdir.lo=
       xlim.lo = c(0, maxdist)
     }
   }
-
+  
   opar = par()
   opar = par_remove_readonly(opar)
   
@@ -772,8 +928,8 @@ plot.gmEVario = function(x, xlim.up=NULL, xlim.lo=NULL, vdir.up= NULL, vdir.lo=
     for(j in 1:Dv){
       if((i>=j)&(!is.null(vdir.lo))){
         par(mfg=c(i+1,j,Dv+1,Dv+1))
-        ylim = range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,i,j]))
-        if(commonAxis) ylim=range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,,j]))
+        ylim = range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,i,j]), na.rm = TRUE)
+        if(commonAxis) ylim=range(sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,,j]), na.rm=TRUE)
         myplot(
           sapply(vdir.lo, function(kk) gsi.midValues.lagClass(x["lags",kk][[1]])), 
           sapply(vdir.lo, function(kk) x["gamma",kk][[1]][,i,j]),  ylim=ylim, ...)
@@ -785,8 +941,8 @@ plot.gmEVario = function(x, xlim.up=NULL, xlim.lo=NULL, vdir.up= NULL, vdir.lo=
       }
       if((i<=j)&(!is.null(vdir.up))){
         par(mfg=c(i,j+1,Dv+1,Dv+1))
-        ylim = range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,i,j]))
-        if(commonAxis) ylim=range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,,j]))
+        ylim = range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,i,j]), na.rm = TRUE)
+        if(commonAxis) ylim=range(sapply(vdir.up, function(kk) x["gamma",kk][[1]][,,j]), na.rm=TRUE)
         myplot(
           sapply(vdir.up, function(kk) gsi.midValues.lagClass(x["lags",kk][[1]])), 
           sapply(vdir.up, function(kk) x["gamma",kk][[1]][,i,j]),  ylim=ylim, ...)
-- 
GitLab