diff --git a/NAMESPACE b/NAMESPACE index 77c6ff85b492887c8c2fbc6fdce1bc117bddb0dd..f13569a86b89efe41974bef011ac4a4d705f3785 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -290,8 +290,10 @@ importFrom(sp,proj4string) importFrom(stats,complete.cases) importFrom(stats,cov.wt) importFrom(stats,lm) +importFrom(stats,loadings) importFrom(stats,loess) importFrom(stats,predict) +importFrom(stats,princomp) importFrom(stats,quantile) importFrom(utils,download.file) importFrom(utils,methods) diff --git a/R/compositionsCompatibility.R b/R/compositionsCompatibility.R index 3e5990871c33f8f5896c2044694e856f8e589544..21d413229a78764335f718032399897b081084ed 100644 --- a/R/compositionsCompatibility.R +++ b/R/compositionsCompatibility.R @@ -1076,7 +1076,14 @@ variogramModelPlot.logratioVariogram <- function(vg, model = NULL, # gstat or stop("not yet available") if(!is.null(model)){ model = as.LMCAnisCompo(model) - } + } + if(!("logratioVariogramAnisotropy" %in% class(vg))){ + dim(vg) = c(3,1) + attr(vg, "lags") = + colnames(lrvg) = "0N" + + + } } diff --git a/R/genDiag.R b/R/genDiag.R index ef218374ed1c4e06e77b0b5f54460b0c611f770d..80f3493b381fc9cf2b49d78b6908f33878ac7729 100644 --- a/R/genDiag.R +++ b/R/genDiag.R @@ -1,481 +1,482 @@ -#### MAF calculations (2 versions) ---------------- -# compute the MAF basis from two matrices of (alr/ilr/clr)-covariance -gsi.getMAFbasis = function(C0, CT, tol=1e-12){ - svdT = svd(CT) - A1 = with(svdT, u %*% diag(ifelse(d>tol, 1/sqrt(d), 0)) ) - B = t(A1) %*% C0 %*% A1 - svdB = svd(B) - o = nrow(svdB$u):1 - A = A1[,o] %*% svdB$u[o,] - return(A) -} - - -gsi.computeExplainedVariance <- function(X, Winv){ - rs = sapply(1:ncol(X), function(i) mvar(rmult(outer(X[,i], Winv[i,])))) - return(rs) -} - - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @param ... generic functionality arguments -#' @export -Maf = function(x,...) UseMethod("Maf",x) - -#' Generalised diagonalisations -#' Calculate several generalized diagonalisations out of a data set and its empirical -#' variogram -#' @param x a data set, typically of class "data.frame" or of a compositional class -#' @param vg empirical variogram, of a kind fitting to the data provided -#' @param i a slicer for the variogram, typically this will be one or more indices of -#' the lag distance to take. %For other options see code{getEmpVariogramSlice}. -#' -#' @return An object extending \code{c("princomp.CLASSOF(x)",}"\code{\link{princomp}}") -#' with classes "\code{genDiag}", and an extra class argument depending on the -#' diagonalisation method chosen. -#' -#' Function \code{Maf} results carry the extra class "\code{maf}", and they correspond -#' to minimum/maximum autocorrelation factors (MAF) as proposed by Switzer and Green -#' (XXXX) or XXXXX (XXXX). In this case, the -#' slicer is typically just the index of one lag distance (defaults to i=2). MAF -#' provides the analytical solution to the joint diagonalisation of two matrices, -#' the covariance of increments provided by the slicing and the conventional covariance -#' matrix (of the idt transformed values, if appropriate). Resulting factors are ordered -#' in decreasing order of spatial continuity, which might produce surprising -#' scree-plots for those who are used to see a screeplot of a principal component analysis. -#' -#' Function \code{UWEDGE} (Uniformly Weighted Exhaustive Diagonalization with Gauss -#' iterations) results carry the extra class "\code{uwedge}". The function -#' is a wrapper on \code{jointDiag::uwedge} from package \code{jointDiag}. In this case the -#' slicer is typically just a subset of indices of lag distances to consider -#' (defaults to the nearest indexes to mininum, maximum and mean lag distances of the -#' variogram). UWEDGE iteratively seeks for a pair of matrices (a mixing and a -#' demixing matrices) diagonalises the set of matrices \eqn{M_1, M_2, \ldots, M_K} -#' given, by minimizing the target quantity -#' \deqn{Q_{uwedge}(A, W) = \sum_{k=1}^K Tr[E_k^t\cdot E_k],} -#' where \eqn{E_k = (W^t\cdot M_k \cdot W- A\cdot \Lambda_k\cdot A^t)} and -#' \eqn{\Lambda_k = diag(W^t\cdot M_k \cdot W)} is the resulting diagonalized version of -#' each matrix. Obtained factors are ordered -#' in decreasing order of spatial continuity, which might produce surprising -#' scree-plots for those who are used to see a screeplot of a principal component analysis. -#' -#' Function \code{RJD} results carry the extra class "\code{rjd}". The function -#' is a wrapper on \code{JADE::rjd}, implementing the Rotational joint diagonalisation method . In this case the -#' slicer is typically just a subset of indices of lag distances to consider -#' (defaults to the nearest indexes to mininum, maximum and mean lag distances). -#' This algorithm also served for quasi-diagonalising a set of matrices as in UWEDGE, -#' just that in this case the quantity to minimise is the sum of sequares of all off-diagonal -#' elements of \eqn{A^t\cdot M_k\cdot A} for all \eqn{k=1, 2, \ldots K}. -#' -#' All these functions produce output mimicking \code{\link{princomp}}, i.e. with -#' elements -#' \describe{ -#' \item{sdev}{contrary to the output in PCA, this contains the square root of the -#' metric variance of the predictions obtained for each individual factor; this is the -#' quantity needed for \code{\link{screeplot}} to create plots of explained variance -#' by factor} -#' \item{loadings}{matrix of contributions of each (cdt-transformed) original variable to the new factors} -#' \item{center}{center of the data set (eventually, represented through \code{\link{cdt}}), -#' in compositional methods} -#' \item{scale}{the scalings applied to each original variable} -#' \item{n.obs}{number of observations} -#' \item{scores}{the scores of the supplied data on the new factors} -#' \item{call}{the call to the function (attention: it actually may come much later)} -#' } -#' and additionally some of the following arguments, in different order -#' \describe{ -#' \item{invLoadings}{matrix of contributions of each factor onto each original variable} -#' \item{Center}{compositional methods return here the cdt-backtransformed center} -#' \item{InvLoadings}{compositional methods return here the clr-backtransformed inverse loadings, so that -#' each column of this matrix can be understood as a composition on itself} -#' \item{DownInvLoadings}{compositional methods return here the clr-backtransformed "minus inverse loadings", so that -#' each column of this matrix can be understood as a composition on itself; details in -#' \code{\link{princomp.acomp}} } -#' \item{C1, C2}{Maf returns the two matrices that were diagonalised} -#' \item{eigenvalues}{Maf returns the generalized eigenvalues of the diagonalisation of C1 and C2} -#' \item{gof}{UWEDGE returns the values of the goodness of fit criterion across sweeps} -#' \item{diagonalized}{RJD returns the diagonalized matrices, in an array of (K,D,D)-dimensions, being -#' D the number of variables in \code{x}} -#' \item{type}{a string describing which package and which function was used as a workhorse for -#' the calculation} -#' } -#' -#' -#' NOTE: if the arguments you provide to RJD and UWEDGE are not of the appropriate type -#' (i.e. data.frames or equivalent) the default method of these functions will just attempt -#' to call the basis functions JADE:rjd and JointDiag:uwedge respectively. -#' This will be the case if you provide \code{x} as a "\code{matrix}", or as -#' an "\code{array}". In those cases, the output will NOT be structured as an extension -#' to princomp results; instead they will be native output from those functions. -#' -#' -#' @export -#' @method Maf data.frame -#' @aliases genDiag -#' -#' @family generalised Diagonalisations -#' -#' @examples -#' require("magrittr") -#' require("gstat") -#' require("compositions") -#' data("jura", package="gstat") -#' gs = gstat(id="Cd", formula=log(Cd)~1, locations=~Xloc+Yloc, data=jura.pred) %>% -#' gstat(id="Co", formula=log(Cd)~1, locations=~Xloc+Yloc, data=jura.pred) %>% -#' gstat(id="Cr", formula=log(Cr)~1, locations=~Xloc+Yloc, data=jura.pred) %>% -#' gstat(id="Cu", formula=log(Cu)~1, locations=~Xloc+Yloc, data=jura.pred) %>% -#' gstat(id="Ni", formula=log(Ni)~1, locations=~Xloc+Yloc, data=jura.pred) %>% -#' gstat(id="Pb", formula=log(Pb)~1, locations=~Xloc+Yloc, data=jura.pred) %>% -#' gstat(id="Zn", formula=log(Zn)~1, locations=~Xloc+Yloc, data=jura.pred) -#' vg = variogram(gs) -#' mf = Maf(aplus(jura.pred[, -(1:6)]), vg=vg) -#' mf -#' mf$loadings -#' biplot(mf) -Maf.data.frame <- function(x, vg, i=2,...){ - cl <- match.call() - C1 = unclass(var(idt(x))) - mn = colMeans(cdt(x)) - C2 = 0*C1 - ## BEGIN: to change in the future to use gmEVario - vg = as.gstatVariogram(vg) - #dists = unique(vg$dist) - #vv = vg[abs(dists[i]-vg$dist)<1e-9*dists[i],] - vv = sapply(split(vg$gamma, vg$id), function(x)x[i]) - vv = vv[order(names(vv))] - C2[lower.tri(C2,diag=TRUE)]=vv - dd = diag(diag(C2)) - C2 = unclass(C2 + t(C2) - dd) - ## END: to change in the future to use gmEVario - - - dt.centred = scale(x, center = TRUE, scale=FALSE) - eT = eigen(C1) - A1 = with(eT, vectors %*% diag(ifelse(values>1e-12, 1/sqrt(values), 0)) ) ## C1^{-0.5} - B = t(A1) %*% C2 %*% A1 - eB = eigen(B) - A2 = eB$vectors - - eigenvalues = eB$values - ord = order(eigenvalues) - A2 = A2[, ord] - W = unclass(A1) %*% unclass(A2) - rownames(W) = colnames(x) - # do not normalize - # W = unclass(t(normalize(rmult(t(W))))) - W = unclass(W) - Winv = solve(W) - colnames(Winv) = colnames(W) - facscores = unclass(dt.centred) %*% W - colnames(facscores) = paste("maf", 1:ncol(facscores), sep="") - sdevs = sqrt(gsi.computeExplainedVariance(facscores, Winv)) - res = list(sdev=sdevs, loadings = W, center=mn, scale=rep(1, ncol(x)), - n.obs=nrow(x), scores = facscores, C1=C1, C2=C2, eigenvalues = eigenvalues[ord], - invLoadings=Winv) - if(class(x)!="data.frame"){ - res$Center = cdtInv(mn) - res$InvLoadings = cdtInv(Winv, orig=x) - res$InvDownLoadings = cdtInv(-Winv, orig=x) - } - res$call = cl - res$type="gmGeostats::MAF" - class(res)= c("maf","genDiag",paste("princomp", class(x), sep="."), "princomp") - return(res) -} - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method Maf rmult -#' @export -Maf.rmult <- Maf.data.frame - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method Maf aplus -#' @export -Maf.aplus <- Maf.data.frame - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method Maf rplus -#' @export -Maf.rplus <- Maf.data.frame - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method Maf ccomp -#' @export -Maf.ccomp <- Maf.data.frame - - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method Maf rcomp -#' @export -Maf.rcomp <- function(x, vg, i=2,...){ - requireNamespace("compositions", quietly=TRUE) - cl <- match.call() - C1 = unclass(clrvar2ilr(cov(x))) - C2 = unclass(clrvar2ilr(variation2clrvar(vg$vg[i,,]))) - clr.centred = scale(cdt(x), center = TRUE, scale=FALSE) - - eT = eigen(C1) - A1 = with(eT, vectors %*% diag(ifelse(values>1e-12, 1/sqrt(values), 0)) ) ## C1^{-0.5} - B = t(A1) %*% C2 %*% A1 - eB = eigen(B) - A2 = eB$vectors - - eigenvalues = eB$values - ord = order(eigenvalues) - A2 = A2[, ord] - W = unclass(A1) %*% unclass(A2) - # do not normalize - #W = unclass(t(normalize(rmult(t(W))))) - Wclr = unclass(t(ilr2clr(t(W)))) - rownames(Wclr) = colnames(x) - Winv = solve(W) - Wclr.inv = unclass(ilr2clr(Winv)) - colnames(Wclr.inv) = colnames(x) - facscores = unclass(clr.centred) %*% Wclr - colnames(facscores) = paste("maf", 1:ncol(facscores), sep="") - sdevs = sqrt(gsi.computeExplainedVariance(facscores, Wclr.inv)) - res = list(sdev=sdevs, loadings = Wclr, center=mean(cdt(x),...), scale=rep(1, ncol(x)), - n.obs=nrow(x), scores = facscores, - C1=C1, C2=C2, eigenvalues = eigenvalues[ord], invLoadings=Wclr.inv, - Center = mean(x, ...), - InvLoadings = cdtInv(Wclr.inv, orig=x), - InvDownLoadings = cdtInv(-Wclr.inv, orig=x), - call = cl, type="gmGeostats::MAF") - kkk = paste("princomp", class(x), sep=".") - class(res)= c("maf","genDiag", kkk, "princomp") - return(res) -} - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method Maf acomp -#' @export -Maf.acomp <- Maf.rcomp - - - -#### UWEDGE calculations ---------- - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @export -UWEDGE = function(x,...){ - if(!requireNamespace("jointDiag", quietly=FALSE)) stop("UWEDGE requires package 'jointDiag' installed") - UseMethod("UWEDGE",x) -} - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method UWEDGE default -#' @export -UWEDGE.default <- function(x, ...) jointDiag::uwedge(M=x, ...) - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method UWEDGE acomp -#' @export -UWEDGE.acomp <- function(x, vg, i=NULL, ...){ - requireNamespace("compositions", quietly=TRUE) - cl <- match.call() - vg = as.logratioVariogram(vg) - if(is.null(i)){ - i = c(1, floor(c(0.5,1)*dim(vg$vg)[1])) - } - V = loadings(princomp(x)) - C1 = clrvar2ilr(cov(x), V = V) - d = ncol(x)-1 - M = -0.5*apply(vg$vg[c(1,i),,], 1, clrvar2ilr, V=V ) - dim(M)=c(d,d,length(i)+1) - M[,,1]=C1 - res = jointDiag::uwedge(M) - - ord = order(diag(res$B %*% M[,,length(i)] %*% t(res$B))) - res$B = res$B[ord,] - W = t(res$B) - # do not normalize - # W = unclass(t(normalize(rmult(t(W))))) - W = unclass(W) - ilrx = ilr(x, V=V) - mns = mean(ilrx) - ilrxc = ilrx-mns - facscores = unclass(ilrxc) %*% W - Wclr = V %*% W - rownames(Wclr) = colnames(x) - colnames(Wclr) <- paste("uwedge", 1:d, sep="") - Wclr.inv = gsiInv(Wclr) - colnames(Wclr.inv) = colnames(x) - colnames(facscores) <- paste("uwedge", 1:d, sep="") - sdevs = sqrt(gsi.computeExplainedVariance(facscores, Wclr.inv)) - out = list(sdev=sdevs, loadings = Wclr, center=mean(cdt(x)), - scale=rep(1, ncol(x)), - n.obs=nrow(x), scores = facscores, - gof=res$criter, invLoadings=Wclr.inv, - Center = mean(x), - InvLoadings = cdtInv(Wclr.inv, orig=x), - InvDownLoadings = cdtInv(-Wclr.inv, orig=x), - call = cl, type="jointDiag::uwedge") - kkk = paste("princomp", class(x), sep=".") - class(out)= c("uwedge","genDiag", kkk, "princomp") - return(out) -} - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method UWEDGE rcomp -#' @export -UWEDGE.rcomp <- UWEDGE.acomp - - - - -### RJD calculation ----------------------- - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @export -RJD = function(x,...){ - if(!requireNamespace("JADE", quietly=FALSE)) stop("RJD requires package 'JADE' installed") - UseMethod("RJD",x) -} - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method RJD default -#' @export -RJD.default <- function(x, ...){ - JADE::rjd(X=x, ...) -} - - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method RJD acomp -#' @export -RJD.acomp <- function(x, vg, i=NULL, ...){ - requireNamespace("compositions", quietly=TRUE) - cl <- match.call() - vg = as.logratioVariogram(vg) - if(is.null(i)) - i = c(1, floor(c(0.5,1)*dim(vg$vg)[1])) - d = ncol(x)-1 - i = sort(i,decreasing=TRUE) - V = loadings(princomp(x)) - M = -0.5*apply(vg$vg[i ,,], 1, clrvar2ilr, V=V ) - dim(M)=c(d,d,length(i)) - res = JADE::rjd(M) - res$B = t(res$V) - - ord = order(diag(res$B %*% M[,,2] %*% t(res$B))) - res$B = res$B[ord,] - W = t(res$B) - # do not normalize - # W = unclass(t(normalize(rmult(t(W))))) - W = unclass(W) - ilrx = ilr(x, V=V) - mns = mean(ilrx) - ilrxc = ilrx-mns - facscores = unclass(ilrxc) %*% W - Wclr = V %*% W - rownames(Wclr) = colnames(x) - Wclr.inv = t(Wclr) - colnames(Wclr.inv) = colnames(x) - sdevs = sqrt(gsi.computeExplainedVariance(facscores, Wclr.inv)) - o = order(sdevs, decreasing = TRUE) - Wclr = Wclr[,o] - facscores = facscores[,o] - Wclr.inv = Wclr.inv[o,] - colnames(Wclr) <- rownames(Wclr.inv) <- paste("rjd", 1:d, sep="") - colnames(facscores) <- paste("rjd", 1:d, sep="") - out = list(sdev=sdevs[o], loadings = Wclr, center=mean(cdt(x)), - scale=rep(1, ncol(x)), - n.obs=nrow(x), scores = facscores, - diagonalized = res$D[o,o,], invLoadings=Wclr.inv, - Center = mean(x), - InvLoadings = cdtInv(Wclr.inv, orig=x), - InvDownLoadings = cdtInv(-Wclr.inv, orig=x), - call = cl, type="JADE::rjd") - kkk = paste("princomp", class(x), sep=".") - class(out)= c("rjd","genDiag", kkk, "princomp") - return(out) -} - -#' @describeIn Maf.data.frame Generalised diagonalisations -#' @method RJD rcomp -#' @export -RJD.rcomp <- RJD.acomp - - -### genDiag methods ---- - -#' Predict method for generalised diagonalisation objects -#' -#' @param object a generalized diagonalisation object, as obtained from a call to -#' \code{\link{Maf}}, and on the same page, information on the other diagonalisation -#' methods \code{UWEDGE} or \code{RJD} -#' @param newdata a matrix or data.frame of factor scores to convert back to the original -#' scale (default: the scores element from `object`) -#' @param ... not used, kept for generic compatibility -#' -#' @return A data set or compositional object of the nature of the original data -#' used for creating the genDiag object. -#' @export -#' @family generalised Diagonalisations -#' @examples -#' data("jura", package="gstat") -#' juracomp = compositions::acomp(jura.pred[, -(1:6)]) -#' lrvg = logratioVariogram(data=juracomp, loc=jura.pred[,1:2]) -#' mf = Maf(juracomp, vg=lrvg) -#' mf -#' biplot(mf) -#' predict(mf) -#' unclass(predict(mf)) - unclass(juracomp) # predict recovers the original composition -predict.genDiag = function (object, newdata=NULL, ...) { - requireNamespace("compositions", quietly=TRUE) - if(is.null(newdata)) - newdata = object$scores - if("data.frame" %in% class(newdata)) - newdata = as.matrix(newdata) - Z = newdata %*% unclass(object$invLoadings) - if("Center" %in% names(object)){ - Z = cdtInv(Z, orig = object$Center) + object$Center - }else{ - Z = Z + outer(rep(1, nrow(Z)), object$center) - Z = data.frame(Z) - colnames(Z) = names(object$center) - } - return(Z) -} - - - -#' Colored biplot for gemeralised diagonalisations -#' Colored biplot method for objects of class genDiag -#' @param x a generalized diagonalisation object, as obtained from a call to -#' \code{\link{Maf}} (or to \code{UWEDGE} or \code{RJD}, on the help page of \code{\link{Maf}}). -#' @param choices which factors should be represented? vector of 2 indices; defaults to -#' c(1,2) -#' @param scale deprecated, kept for coherence with \code{link{biplot.princomp}} -#' @param pc.biplot same as the homonimous argument from \code{link{biplot.princomp}}: -#' boolean, to scale variables down by sqrt(n) and observations up by the same factor. -#' @param ... further arguments to \code{\link{coloredBiplot}} -#' -#' @return nothing. Function is called exclusively to produce the plot -#' @export -#' @family generalised Diagonalisations -#' @importFrom compositions coloredBiplot -#' @method coloredBiplot genDiag -#' -#' @examples -#' data("jura", package="gstat") -#' juracomp = compositions::acomp(jura.pred[, -(1:6)]) -#' lrvg = logratioVariogram(data=juracomp, loc=jura.pred[,1:2]) -#' mf = Maf(juracomp, vg=lrvg) -#' mf -#' compositions::coloredBiplot(mf, xlabs.col=as.integer(jura.pred$Rock)+2) -coloredBiplot.genDiag <- function(x, choices = 1:2, scale=0, pc.biplot=FALSE, ...){ - if(length(choices) != 2) stop("length of choices must be 2") - if(!length(scores <- x$scores)) - stop(gettextf("object '%s' has no scores", deparse(substitute(x))), - domain = NA) - lam <- rep(1, along.with=choices) - if(is.null(n <- x$n.obs)) n <- 1 - lam <- lam * sqrt(n) - if(scale < 0 || scale > 1) warning("'scale' is outside [0, 1]") - if(scale != 0) lam <- lam^scale else lam <- 1 - if(pc.biplot) lam <- lam / sqrt(n) - coloredBiplot.default(t(t(scores[,choices]) / lam), - t(x$invLoadings[choices,] * lam), ...) - invisible() -} +#### MAF calculations (2 versions) ---------------- +# compute the MAF basis from two matrices of (alr/ilr/clr)-covariance +gsi.getMAFbasis = function(C0, CT, tol=1e-12){ + svdT = svd(CT) + A1 = with(svdT, u %*% diag(ifelse(d>tol, 1/sqrt(d), 0)) ) + B = t(A1) %*% C0 %*% A1 + svdB = svd(B) + o = nrow(svdB$u):1 + A = A1[,o] %*% svdB$u[o,] + return(A) +} + + +gsi.computeExplainedVariance <- function(X, Winv){ + rs = sapply(1:ncol(X), function(i) mvar(rmult(outer(X[,i], Winv[i,])))) + return(rs) +} + + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @param ... generic functionality arguments +#' @export +#' @importFrom stats loadings princomp +Maf = function(x,...) UseMethod("Maf",x) + +#' Generalised diagonalisations +#' Calculate several generalized diagonalisations out of a data set and its empirical +#' variogram +#' @param x a data set, typically of class "data.frame" or of a compositional class +#' @param vg empirical variogram, of a kind fitting to the data provided +#' @param i a slicer for the variogram, typically this will be one or more indices of +#' the lag distance to take. %For other options see code{getEmpVariogramSlice}. +#' +#' @return An object extending \code{c("princomp.CLASSOF(x)",}"\code{\link{princomp}}") +#' with classes "\code{genDiag}", and an extra class argument depending on the +#' diagonalisation method chosen. +#' +#' Function \code{Maf} results carry the extra class "\code{maf}", and they correspond +#' to minimum/maximum autocorrelation factors (MAF) as proposed by Switzer and Green +#' (XXXX) or XXXXX (XXXX). In this case, the +#' slicer is typically just the index of one lag distance (defaults to i=2). MAF +#' provides the analytical solution to the joint diagonalisation of two matrices, +#' the covariance of increments provided by the slicing and the conventional covariance +#' matrix (of the idt transformed values, if appropriate). Resulting factors are ordered +#' in decreasing order of spatial continuity, which might produce surprising +#' scree-plots for those who are used to see a screeplot of a principal component analysis. +#' +#' Function \code{UWEDGE} (Uniformly Weighted Exhaustive Diagonalization with Gauss +#' iterations) results carry the extra class "\code{uwedge}". The function +#' is a wrapper on \code{jointDiag::uwedge} from package \code{jointDiag}. In this case the +#' slicer is typically just a subset of indices of lag distances to consider +#' (defaults to the nearest indexes to mininum, maximum and mean lag distances of the +#' variogram). UWEDGE iteratively seeks for a pair of matrices (a mixing and a +#' demixing matrices) diagonalises the set of matrices \eqn{M_1, M_2, \ldots, M_K} +#' given, by minimizing the target quantity +#' \deqn{Q_{uwedge}(A, W) = \sum_{k=1}^K Tr[E_k^t\cdot E_k],} +#' where \eqn{E_k = (W^t\cdot M_k \cdot W- A\cdot \Lambda_k\cdot A^t)} and +#' \eqn{\Lambda_k = diag(W^t\cdot M_k \cdot W)} is the resulting diagonalized version of +#' each matrix. Obtained factors are ordered +#' in decreasing order of spatial continuity, which might produce surprising +#' scree-plots for those who are used to see a screeplot of a principal component analysis. +#' +#' Function \code{RJD} results carry the extra class "\code{rjd}". The function +#' is a wrapper on \code{JADE::rjd}, implementing the Rotational joint diagonalisation method . In this case the +#' slicer is typically just a subset of indices of lag distances to consider +#' (defaults to the nearest indexes to mininum, maximum and mean lag distances). +#' This algorithm also served for quasi-diagonalising a set of matrices as in UWEDGE, +#' just that in this case the quantity to minimise is the sum of sequares of all off-diagonal +#' elements of \eqn{A^t\cdot M_k\cdot A} for all \eqn{k=1, 2, \ldots K}. +#' +#' All these functions produce output mimicking \code{\link{princomp}}, i.e. with +#' elements +#' \describe{ +#' \item{sdev}{contrary to the output in PCA, this contains the square root of the +#' metric variance of the predictions obtained for each individual factor; this is the +#' quantity needed for \code{\link{screeplot}} to create plots of explained variance +#' by factor} +#' \item{loadings}{matrix of contributions of each (cdt-transformed) original variable to the new factors} +#' \item{center}{center of the data set (eventually, represented through \code{\link{cdt}}), +#' in compositional methods} +#' \item{scale}{the scalings applied to each original variable} +#' \item{n.obs}{number of observations} +#' \item{scores}{the scores of the supplied data on the new factors} +#' \item{call}{the call to the function (attention: it actually may come much later)} +#' } +#' and additionally some of the following arguments, in different order +#' \describe{ +#' \item{invLoadings}{matrix of contributions of each factor onto each original variable} +#' \item{Center}{compositional methods return here the cdt-backtransformed center} +#' \item{InvLoadings}{compositional methods return here the clr-backtransformed inverse loadings, so that +#' each column of this matrix can be understood as a composition on itself} +#' \item{DownInvLoadings}{compositional methods return here the clr-backtransformed "minus inverse loadings", so that +#' each column of this matrix can be understood as a composition on itself; details in +#' \code{\link{princomp.acomp}} } +#' \item{C1, C2}{Maf returns the two matrices that were diagonalised} +#' \item{eigenvalues}{Maf returns the generalized eigenvalues of the diagonalisation of C1 and C2} +#' \item{gof}{UWEDGE returns the values of the goodness of fit criterion across sweeps} +#' \item{diagonalized}{RJD returns the diagonalized matrices, in an array of (K,D,D)-dimensions, being +#' D the number of variables in \code{x}} +#' \item{type}{a string describing which package and which function was used as a workhorse for +#' the calculation} +#' } +#' +#' +#' NOTE: if the arguments you provide to RJD and UWEDGE are not of the appropriate type +#' (i.e. data.frames or equivalent) the default method of these functions will just attempt +#' to call the basis functions JADE:rjd and JointDiag:uwedge respectively. +#' This will be the case if you provide \code{x} as a "\code{matrix}", or as +#' an "\code{array}". In those cases, the output will NOT be structured as an extension +#' to princomp results; instead they will be native output from those functions. +#' +#' +#' @export +#' @method Maf data.frame +#' @aliases genDiag +#' +#' @family generalised Diagonalisations +#' +#' @examples +#' require("magrittr") +#' require("gstat") +#' require("compositions") +#' data("jura", package="gstat") +#' gs = gstat(id="Cd", formula=log(Cd)~1, locations=~Xloc+Yloc, data=jura.pred) %>% +#' gstat(id="Co", formula=log(Cd)~1, locations=~Xloc+Yloc, data=jura.pred) %>% +#' gstat(id="Cr", formula=log(Cr)~1, locations=~Xloc+Yloc, data=jura.pred) %>% +#' gstat(id="Cu", formula=log(Cu)~1, locations=~Xloc+Yloc, data=jura.pred) %>% +#' gstat(id="Ni", formula=log(Ni)~1, locations=~Xloc+Yloc, data=jura.pred) %>% +#' gstat(id="Pb", formula=log(Pb)~1, locations=~Xloc+Yloc, data=jura.pred) %>% +#' gstat(id="Zn", formula=log(Zn)~1, locations=~Xloc+Yloc, data=jura.pred) +#' vg = variogram(gs) +#' mf = Maf(aplus(jura.pred[, -(1:6)]), vg=vg) +#' mf +#' mf$loadings +#' biplot(mf) +Maf.data.frame <- function(x, vg, i=2,...){ + cl <- match.call() + C1 = unclass(var(idt(x))) + mn = colMeans(cdt(x)) + C2 = 0*C1 + ## BEGIN: to change in the future to use gmEVario + vg = as.gstatVariogram(vg) + #dists = unique(vg$dist) + #vv = vg[abs(dists[i]-vg$dist)<1e-9*dists[i],] + vv = sapply(split(vg$gamma, vg$id), function(x)x[i]) + vv = vv[order(names(vv))] + C2[lower.tri(C2,diag=TRUE)]=vv + dd = diag(diag(C2)) + C2 = unclass(C2 + t(C2) - dd) + ## END: to change in the future to use gmEVario + + + dt.centred = scale(x, center = TRUE, scale=FALSE) + eT = eigen(C1) + A1 = with(eT, vectors %*% diag(ifelse(values>1e-12, 1/sqrt(values), 0)) ) ## C1^{-0.5} + B = t(A1) %*% C2 %*% A1 + eB = eigen(B) + A2 = eB$vectors + + eigenvalues = eB$values + ord = order(eigenvalues) + A2 = A2[, ord] + W = unclass(A1) %*% unclass(A2) + rownames(W) = colnames(x) + # do not normalize + # W = unclass(t(normalize(rmult(t(W))))) + W = unclass(W) + Winv = solve(W) + colnames(Winv) = colnames(W) + facscores = unclass(dt.centred) %*% W + colnames(facscores) = paste("maf", 1:ncol(facscores), sep="") + sdevs = sqrt(gsi.computeExplainedVariance(facscores, Winv)) + res = list(sdev=sdevs, loadings = W, center=mn, scale=rep(1, ncol(x)), + n.obs=nrow(x), scores = facscores, C1=C1, C2=C2, eigenvalues = eigenvalues[ord], + invLoadings=Winv) + if(class(x)!="data.frame"){ + res$Center = cdtInv(mn) + res$InvLoadings = cdtInv(Winv, orig=x) + res$InvDownLoadings = cdtInv(-Winv, orig=x) + } + res$call = cl + res$type="gmGeostats::MAF" + class(res)= c("maf","genDiag",paste("princomp", class(x), sep="."), "princomp") + return(res) +} + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method Maf rmult +#' @export +Maf.rmult <- Maf.data.frame + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method Maf aplus +#' @export +Maf.aplus <- Maf.data.frame + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method Maf rplus +#' @export +Maf.rplus <- Maf.data.frame + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method Maf ccomp +#' @export +Maf.ccomp <- Maf.data.frame + + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method Maf rcomp +#' @export +Maf.rcomp <- function(x, vg, i=2,...){ + requireNamespace("compositions", quietly=TRUE) + cl <- match.call() + C1 = unclass(clrvar2ilr(cov(x))) + C2 = unclass(clrvar2ilr(variation2clrvar(vg$vg[i,,]))) + clr.centred = scale(cdt(x), center = TRUE, scale=FALSE) + + eT = eigen(C1) + A1 = with(eT, vectors %*% diag(ifelse(values>1e-12, 1/sqrt(values), 0)) ) ## C1^{-0.5} + B = t(A1) %*% C2 %*% A1 + eB = eigen(B) + A2 = eB$vectors + + eigenvalues = eB$values + ord = order(eigenvalues) + A2 = A2[, ord] + W = unclass(A1) %*% unclass(A2) + # do not normalize + #W = unclass(t(normalize(rmult(t(W))))) + Wclr = unclass(t(ilr2clr(t(W)))) + rownames(Wclr) = colnames(x) + Winv = solve(W) + Wclr.inv = unclass(ilr2clr(Winv)) + colnames(Wclr.inv) = colnames(x) + facscores = unclass(clr.centred) %*% Wclr + colnames(facscores) = paste("maf", 1:ncol(facscores), sep="") + sdevs = sqrt(gsi.computeExplainedVariance(facscores, Wclr.inv)) + res = list(sdev=sdevs, loadings = Wclr, center=mean(cdt(x),...), scale=rep(1, ncol(x)), + n.obs=nrow(x), scores = facscores, + C1=C1, C2=C2, eigenvalues = eigenvalues[ord], invLoadings=Wclr.inv, + Center = mean(x, ...), + InvLoadings = cdtInv(Wclr.inv, orig=x), + InvDownLoadings = cdtInv(-Wclr.inv, orig=x), + call = cl, type="gmGeostats::MAF") + kkk = paste("princomp", class(x), sep=".") + class(res)= c("maf","genDiag", kkk, "princomp") + return(res) +} + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method Maf acomp +#' @export +Maf.acomp <- Maf.rcomp + + + +#### UWEDGE calculations ---------- + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @export +UWEDGE = function(x,...){ + if(!requireNamespace("jointDiag", quietly=FALSE)) stop("UWEDGE requires package 'jointDiag' installed") + UseMethod("UWEDGE",x) +} + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method UWEDGE default +#' @export +UWEDGE.default <- function(x, ...) jointDiag::uwedge(M=x, ...) + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method UWEDGE acomp +#' @export +UWEDGE.acomp <- function(x, vg, i=NULL, ...){ + requireNamespace("compositions", quietly=TRUE) + cl <- match.call() + vg = as.logratioVariogram(vg) + if(is.null(i)){ + i = c(1, floor(c(0.5,1)*dim(vg$vg)[1])) + } + V = loadings(princomp(x)) + C1 = clrvar2ilr(cov(x), V = V) + d = ncol(x)-1 + M = -0.5*apply(vg$vg[c(1,i),,], 1, clrvar2ilr, V=V ) + dim(M)=c(d,d,length(i)+1) + M[,,1]=C1 + res = jointDiag::uwedge(M) + + ord = order(diag(res$B %*% M[,,length(i)] %*% t(res$B))) + res$B = res$B[ord,] + W = t(res$B) + # do not normalize + # W = unclass(t(normalize(rmult(t(W))))) + W = unclass(W) + ilrx = ilr(x, V=V) + mns = mean(ilrx) + ilrxc = ilrx-mns + facscores = unclass(ilrxc) %*% W + Wclr = V %*% W + rownames(Wclr) = colnames(x) + colnames(Wclr) <- paste("uwedge", 1:d, sep="") + Wclr.inv = gsiInv(Wclr) + colnames(Wclr.inv) = colnames(x) + colnames(facscores) <- paste("uwedge", 1:d, sep="") + sdevs = sqrt(gsi.computeExplainedVariance(facscores, Wclr.inv)) + out = list(sdev=sdevs, loadings = Wclr, center=mean(cdt(x)), + scale=rep(1, ncol(x)), + n.obs=nrow(x), scores = facscores, + gof=res$criter, invLoadings=Wclr.inv, + Center = mean(x), + InvLoadings = cdtInv(Wclr.inv, orig=x), + InvDownLoadings = cdtInv(-Wclr.inv, orig=x), + call = cl, type="jointDiag::uwedge") + kkk = paste("princomp", class(x), sep=".") + class(out)= c("uwedge","genDiag", kkk, "princomp") + return(out) +} + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method UWEDGE rcomp +#' @export +UWEDGE.rcomp <- UWEDGE.acomp + + + + +### RJD calculation ----------------------- + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @export +RJD = function(x,...){ + if(!requireNamespace("JADE", quietly=FALSE)) stop("RJD requires package 'JADE' installed") + UseMethod("RJD",x) +} + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method RJD default +#' @export +RJD.default <- function(x, ...){ + JADE::rjd(X=x, ...) +} + + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method RJD acomp +#' @export +RJD.acomp <- function(x, vg, i=NULL, ...){ + requireNamespace("compositions", quietly=TRUE) + cl <- match.call() + vg = as.logratioVariogram(vg) + if(is.null(i)) + i = c(1, floor(c(0.5,1)*dim(vg$vg)[1])) + d = ncol(x)-1 + i = sort(i,decreasing=TRUE) + V = loadings(princomp(x)) + M = -0.5*apply(vg$vg[i ,,], 1, clrvar2ilr, V=V ) + dim(M)=c(d,d,length(i)) + res = JADE::rjd(M) + res$B = t(res$V) + + ord = order(diag(res$B %*% M[,,2] %*% t(res$B))) + res$B = res$B[ord,] + W = t(res$B) + # do not normalize + # W = unclass(t(normalize(rmult(t(W))))) + W = unclass(W) + ilrx = ilr(x, V=V) + mns = mean(ilrx) + ilrxc = ilrx-mns + facscores = unclass(ilrxc) %*% W + Wclr = V %*% W + rownames(Wclr) = colnames(x) + Wclr.inv = t(Wclr) + colnames(Wclr.inv) = colnames(x) + sdevs = sqrt(gsi.computeExplainedVariance(facscores, Wclr.inv)) + o = order(sdevs, decreasing = TRUE) + Wclr = Wclr[,o] + facscores = facscores[,o] + Wclr.inv = Wclr.inv[o,] + colnames(Wclr) <- rownames(Wclr.inv) <- paste("rjd", 1:d, sep="") + colnames(facscores) <- paste("rjd", 1:d, sep="") + out = list(sdev=sdevs[o], loadings = Wclr, center=mean(cdt(x)), + scale=rep(1, ncol(x)), + n.obs=nrow(x), scores = facscores, + diagonalized = res$D[o,o,], invLoadings=Wclr.inv, + Center = mean(x), + InvLoadings = cdtInv(Wclr.inv, orig=x), + InvDownLoadings = cdtInv(-Wclr.inv, orig=x), + call = cl, type="JADE::rjd") + kkk = paste("princomp", class(x), sep=".") + class(out)= c("rjd","genDiag", kkk, "princomp") + return(out) +} + +#' @describeIn Maf.data.frame Generalised diagonalisations +#' @method RJD rcomp +#' @export +RJD.rcomp <- RJD.acomp + + +### genDiag methods ---- + +#' Predict method for generalised diagonalisation objects +#' +#' @param object a generalized diagonalisation object, as obtained from a call to +#' \code{\link{Maf}}, and on the same page, information on the other diagonalisation +#' methods \code{UWEDGE} or \code{RJD} +#' @param newdata a matrix or data.frame of factor scores to convert back to the original +#' scale (default: the scores element from `object`) +#' @param ... not used, kept for generic compatibility +#' +#' @return A data set or compositional object of the nature of the original data +#' used for creating the genDiag object. +#' @export +#' @family generalised Diagonalisations +#' @examples +#' data("jura", package="gstat") +#' juracomp = compositions::acomp(jura.pred[, -(1:6)]) +#' lrvg = logratioVariogram(data=juracomp, loc=jura.pred[,1:2]) +#' mf = Maf(juracomp, vg=lrvg) +#' mf +#' biplot(mf) +#' predict(mf) +#' unclass(predict(mf)) - unclass(juracomp) # predict recovers the original composition +predict.genDiag = function (object, newdata=NULL, ...) { + requireNamespace("compositions", quietly=TRUE) + if(is.null(newdata)) + newdata = object$scores + if("data.frame" %in% class(newdata)) + newdata = as.matrix(newdata) + Z = newdata %*% unclass(object$invLoadings) + if("Center" %in% names(object)){ + Z = cdtInv(Z, orig = object$Center) + object$Center + }else{ + Z = Z + outer(rep(1, nrow(Z)), object$center) + Z = data.frame(Z) + colnames(Z) = names(object$center) + } + return(Z) +} + + + +#' Colored biplot for gemeralised diagonalisations +#' Colored biplot method for objects of class genDiag +#' @param x a generalized diagonalisation object, as obtained from a call to +#' \code{\link{Maf}} (or to \code{UWEDGE} or \code{RJD}, on the help page of \code{\link{Maf}}). +#' @param choices which factors should be represented? vector of 2 indices; defaults to +#' c(1,2) +#' @param scale deprecated, kept for coherence with \code{link{biplot.princomp}} +#' @param pc.biplot same as the homonimous argument from \code{link{biplot.princomp}}: +#' boolean, to scale variables down by sqrt(n) and observations up by the same factor. +#' @param ... further arguments to \code{\link{coloredBiplot}} +#' +#' @return nothing. Function is called exclusively to produce the plot +#' @export +#' @family generalised Diagonalisations +#' @importFrom compositions coloredBiplot +#' @method coloredBiplot genDiag +#' +#' @examples +#' data("jura", package="gstat") +#' juracomp = compositions::acomp(jura.pred[, -(1:6)]) +#' lrvg = logratioVariogram(data=juracomp, loc=jura.pred[,1:2]) +#' mf = Maf(juracomp, vg=lrvg) +#' mf +#' compositions::coloredBiplot(mf, xlabs.col=as.integer(jura.pred$Rock)+2) +coloredBiplot.genDiag <- function(x, choices = 1:2, scale=0, pc.biplot=FALSE, ...){ + if(length(choices) != 2) stop("length of choices must be 2") + if(!length(scores <- x$scores)) + stop(gettextf("object '%s' has no scores", deparse(substitute(x))), + domain = NA) + lam <- rep(1, along.with=choices) + if(is.null(n <- x$n.obs)) n <- 1 + lam <- lam * sqrt(n) + if(scale < 0 || scale > 1) warning("'scale' is outside [0, 1]") + if(scale != 0) lam <- lam^scale else lam <- 1 + if(pc.biplot) lam <- lam / sqrt(n) + coloredBiplot.default(t(t(scores[,choices]) / lam), + t(x$invLoadings[choices,] * lam), ...) + invisible() +} diff --git a/src/Makevars b/src/Makevars index a2b25e294eadc4253a8b8e4fb8d041aa612ba96a..534b476314013d5df4b5283ae63af9db2a48f181 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,5 +1,5 @@ -#PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) -PKG_LIBS = $(FLIBS) -lstdc++ # $(SHLIB_OPENMP_CFLAGS) +PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) +PKG_LIBS = $(FLIBS) -lstdc++ $(SHLIB_OPENMP_CFLAGS) C_OBJS = gmGeostats.o diff --git a/src/gmGeostats.c b/src/gmGeostats.c index 3ebb9b1b6151f06352775186a27d5d779a8ae716..fce52bfe9ae199ab1b69c8845aa8e6f16ea3eb44 100644 --- a/src/gmGeostats.c +++ b/src/gmGeostats.c @@ -6,12 +6,11 @@ */ // attention: comment this if not compiling #include <stdio.h> -#include <omp.h> #include <Rinternals.h> -#include <R.h> -#include <Rmath.h> -#include <R_ext/BLAS.h> -#include <R_ext/Lapack.h> + +#ifdef _OPENMP +#include <omp.h> +#endif #define inR // attention: this must be uncommented if not compiling @@ -423,10 +422,12 @@ void CMVTurningBands( (*bandSim[typeCgram[ss]])(n,projs,band,1.0,moreCgramData+ss); /* this function takes the projs and returns on band the the curve */ /* ... multiply the eigenvector by the curve, and accumulate */ +#ifdef _OPENMP #pragma omp parallel for \ if(!omp_in_parallel()&&0) \ num_threads(omp_get_num_procs()) \ default(shared) private(i,j,d2) +#endif for(i=0;i<n;i++){ /* location */ for(j=0;j<d;j++){ /* variable */ d2 = sqrtSill[ss + *nCgrams *(j+d*ev)]*band[i]; @@ -523,12 +524,16 @@ void CCondSim( Rprintf("starting conditioning by dual kriging\n"); /* Kriging from simulated using Cinv */ // Create differenes of obs and sim -#pragma omp parallel \ +#ifdef _OPENMP + #pragma omp parallel \ if(!omp_in_parallel()) \ num_threads(omp_get_num_procs()) \ default(shared) private(i,j,sim,shift,k) +#endif { +#ifdef _OPENMP #pragma omp parallel for +#endif for(sim=0;sim<nsim;sim++) { shift=nd*sim; for(i=0;i<nin;i++) @@ -545,10 +550,12 @@ void CCondSim( */ // Dual Kriging preparation +#ifdef _OPENMP #pragma omp parallel for \ if(!omp_in_parallel()&&0) \ num_threads(omp_get_num_procs()) \ default(shared) private(sim) +#endif for(sim=0;sim<nsim;sim++) { F77_NAME(dgemv)(&No, &dmnin, @@ -608,10 +615,12 @@ void CCondSim( //} /* Z[,i] += t(cbuf(d*nin,d)) %*% dbuf(d*nin) */ +#ifdef _OPENMP #pragma omp parallel for \ if(!omp_in_parallel()&&0) \ num_threads(omp_get_num_procs()) \ default(shared) private(sim) +#endif for(sim=0;sim<nsim;sim++) { F77_NAME(dgemv)(&Transposed, &dmnin, @@ -695,7 +704,9 @@ extern void anaForwardC(const int *dimX, const double h=((double)1.0)/steps; if( dimY[0]!=m ) error("anaForwardC: x and y have different number of variables / rows"); +#ifdef _OPENMP #pragma omp parallel for +#endif for(size_t i=0;i<nx;i++) { double v1[m]; double v2[m]; @@ -741,7 +752,9 @@ extern void anaBackwardC(const int *dimX, const double h=((double)1.0)/steps; if( dimY[0]!=m ) error("anaBackwardC: x and y have different number of variables / rows"); +#ifdef _OPENMP #pragma omp parallel for +#endif for(size_t i=0;i<nx;i++) { double v1[m]; double v2[m];