Question: is it possible to generate a reasonable sorting of soil
profiles (a SoilProfileCollection
) using pair-wise
distances computed only from generalized horizon labels (GHL)? I think
that it might, as long as these labels are encoded as ordered
factors. We can use profile_compare
to test this
hypothesis. One might attempt such a comparison within a collection of
similar soils where the only reliable data available are horizon depths
and designations.
We will be using a subset of the the loafercreek
sample
data from the {soilDB} package.
library(aqp)
library(soilDB)
library(sharpshootR)
library(cluster)
library(dendextend)
library(MASS)
library(corrplot)
# example data
data("loafercreek")
hzdesgnname(loafercreek) <- 'hzname'
# focus on profiles 20-40
x <- loafercreek[20:40, ]
# modify default arguments for some flair
par(mar = c(0, 0, 0, 2))
plotSPC(x, width = 0.33, name.style = 'center-center', cex.names = 0.65, print.id = FALSE)
Apply generalized horizon labels using overly-simplistic rules. This is only a demonstration.
# new labels
n <- c('O', 'A', 'B', 'Bt', 'BC', 'Cr', 'R')
# GHL patterns
p <- c('O', 'A', 'B', 'Bt', 'BC', 'Cr', 'R')
# note additional argument
horizons(x)$genhz <- generalize.hz(x$hzname, new = n, pat = p)
# the most important step, genhz must be encoded as an ordered factor
x$genhz <- ordered(x$genhz)
# check frequency of GHL assignment
# note that some are missing
table(x$genhz)
##
## O A B Bt BC Cr R not-used
## 3 22 18 66 11 8 21 2
Remove profiles containing horizons assigned to the “not-used” GHL.
x$flag <- profileApply(x, function(i) {
any(i$genhz == 'not-used')
})
# SPC-aware version of subset()
y <- subset(x, !flag)
# check: ok
table(y$genhz)
##
## O A B Bt BC Cr R not-used
## 3 20 16 59 11 8 19 0
# add short ID for checking our work
site(y)$shortID <- 1:length(y)
par(mar = c(0, 0, 3, 2))
plotSPC(y, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, fixLabelCollisions = FALSE, col.label = 'Generalized Horizon Label', label = 'shortID')
Add empty horizons to bottom of the profiles with bottom-most horizon
depth < 150cm, label with ‘R’ GHL. Truncate deeper profiles to 150cm.
See ?fillHzGaps
and ?trunc
for details.
# extend with empty horizons
z <- fillHzGaps(y, to_top = NA, to_bottom = 150)
# label as 'R'
# important to use one of the GHL specified in `genhz`
z$hzname[is.na(z$genhz)] <- 'R'
z$genhz[is.na(z$genhz)] <- 'R'
# truncate all profiles to 150cm
z <- trunc(z, z1 = 0, z2 = 150)
par(mar = c(0, 0, 3, 2))
plotSPC(z, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, col.label = 'Generalized Horizon Label', label = 'shortID')
Arrange profiles by depth to contact using plot.order
argument to plotSPC
. See ?getSoilDepthClass
and ?estimateSoilDepth
for details on estimating soil
depth.
# add soil depth information to site
sdc <- getSoilDepthClass(z)
site(z) <- sdc
par(mar = c(0, 0, 3, 2))
plotSPC(z, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, plot.order = order(z$depth), col.label = 'Generalized Horizon Label', label = 'shortID')
Compute pair-wise distances between profiles using genhz
(an ordered factor) to a maximum depth of 150cm. See
?profile_compare
and ?daisy
for details.
Divisive hierarchical clustering is applied to the distance matrix, with
a final re-ordering step according to soil depth
(dendextend::rotate)
.
# aqp >= 2.0
d <- NCSP(z, vars = 'genhz', maxDepth = 150, k = 0, rescaleResult = TRUE)
# divisive hierarchical clustering
h <- as.hclust(diana(d))
# attempt sorting dendrogram by soil depth
h <- dendextend::rotate(h, order(z$depth))
As suggested by Shawn Salley, a quick view of the distance matrix as an image. Pair-wise distances are encoded with color.
# expand dist object to full matrix form of the pair-wise distances
m <- as.matrix(d)
# copy short IDs from Soil Profile Collection to full distance matrix
dimnames(m) <- list(z$shortID, z$shortID)
# invert device foreground / background colors for an artistic effect
# use colors from The Life Aquatic
par(bg = 'black', fg = 'white')
corrplot(
m,
col = hcl.colors(n = 25, palette = 'zissou1'),
is.corr = FALSE,
col.lim = c(0, 1),
method = "color",
order = "original",
type = "upper",
# tl.pos = "n",
# cl.pos = "n",
mar = c(0.1, 0, 0, 0.8)
)
Rank, left to right, profiles according to median distance to all other profiles in the collection. Profiles 17, 6, 4, or 13 might be good candidates for typical pedons.
# convert to full matrix representation
m <- as.matrix(d)
# copy shorter IDs from SPC
dimnames(m) <- list(z$shortID, z$shortID)
# mask diagonal and lower triangle with NA
# ignoring these in subsequent steps
m[upper.tri(m, diag = TRUE)] <- NA
# extract distances to all other profiles
dl <- lapply(1:nrow(m), FUN = function(i) {
.r <- c(m[i, ], m[, i])
return(as.vector(na.omit(.r)))
})
# copy names
names(dl) <- dimnames(m)[[1]]
# compute median distance to all other profiles
(med.dist <- sort(sapply(dl, median, na.rm = TRUE)))
## 17 6 4 13 19 14 5 3 15 1
## 0.2615694 0.2635815 0.2726358 0.2756539 0.2877264 0.2907445 0.3138833 0.3138833 0.3259557 0.3269618
## 16 2 10 11 18 12 7 8 9
## 0.3551308 0.3651911 0.3822938 0.3923541 0.4577465 0.4989940 0.5090543 0.5090543 0.6418511
# sorting vector for plotting sketches
idx <- match(names(med.dist), z$shortID)
par(mar = c(4, 0, 3, 2))
plotSPC(z, plot.order = idx, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, col.label = 'Generalized Horizon Label', label = 'shortID')
axis(1, at = 1:length(z), labels = round(med.dist, 3), line = 0, cex.axis = 0.85)
mtext("Median distance to all other profiles (not to scale)", side = 1, line = 2.25)
Another view of median distance to all other profiles, as a set of
connected points. This helps demonstrate the scale of differences within
the collection. Note that values in the distance matrix d
range from 0 to 1. We have to use some tricks to get the vertical
spacing right.
# trick 1: adjust median distances to fit "above" profiles
# shift down 125cm
# rescale from 0 to 125cm
.yoff <- 125
med.dist.rescale <- .yoff - aqp:::.rescaleRange(med.dist, 0, .yoff)
# trick 2: plot profiles, shifting "down" 150cm
# use y.offset argument
par(mar = c(0.25, 1.5, 3, 1.5))
plotSPC(z, plot.order = idx, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, col.label = 'Generalized Horizon Label', label = 'shortID', scaling.factor = 1, y.offset = 150)
# add the scaled + shifted median distances as points connected by lines
lines(x = 1:length(z), y = med.dist.rescale, type = 'b')
# trick 3: make a special axis describing median distance
# compute "pretty" intervals for the axis, on the shifted and scaled values
.p <- pretty(med.dist.rescale)
# use a linear model to convert between shifted/scaled and original values
# slope ~ scale
# intercept ~ shift
.m <- lm(med.dist ~ med.dist.rescale)
# convert pretty axis values back to original scale
.lab <- predict(.m, data.frame(med.dist.rescale = .p))
# add axis
axis(side = 2, at = .p, labels = round(.lab, 2), line = -1.5, las = 1, cex.axis = 0.75)
# annotate
mtext('Median distance to all other profiles (max = 1)', side = 3, at = 1, adj = 0, line = -2)
Use plotProfileDendrogram()
from the sharpshootR package
to align profile sketches with the terminal leaves of a dendrogram. When
applied to other data, you may have to tinker with the
scaling.factor
and y.offset
arguments.
par(mar = c(1, 0, 3, 2))
plotProfileDendrogram(z, clust = h, scaling.factor = 0.0095, y.offset = 0.15, color = 'genhz', width = 0.33, name.style = 'center-center', col.label = 'Generalized Horizon Label', cex.names = 0.65, label = 'shortID')
Use distance from 1st profile as a virtual transect via
alignTransect
, fixOverlap
attempts to
spread-out profiles along the x-axis to reduce overlap.
m <- as.matrix(d)
set.seed(10101)
pos <- alignTransect(m[1, ], x.min = 1, x.max = length(z), thresh = 0.5)
Distance from 1st profile (left-most, marked with a red asterisk) used to order collection along an integer sequence.
par(mar = c(4, 0, 3, 2))
plotSPC(z, plot.order = pos$order, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, col.label = 'Generalized Horizon Label', label = 'shortID')
points(x = 1, y = -15, pch = '*', cex = 3, col = 'firebrick')
axis(1, at = 1:length(z), labels = round(pos$grad, 3), line = 0, cex.axis = 0.85)
mtext("Distance from 1st Profile (not to scale)", side = 1, line = 2.25)
Distance from 1st profile (left-most, marked with a red asterisk) used to place profiles at relative distances along the x-axis.
par(mar = c(4, 0, 3, 2))
plotSPC(z, plot.order = pos$order, relative.pos = pos$relative.pos, color = 'genhz', width = 0.2, name.style = 'center-center', name = NA, col.label = 'Generalized Horizon Label', label = 'shortID')
points(x = 1, y = -15, pch = '*', cex = 3, col = 'firebrick')
axis(1, at = pos$relative.pos, labels = round(pos$grad, 3), line = 0, cex.axis = 0.66)
mtext("Distance from 1st Profile", side = 1, line = 2.25)
Create an ordination from pair-wise distances using non-metric
multidimensional scaling (MASS::sammon
). It is necessary to
add a small amount of distance back to the distance matrix before
ordination for two reasons:
rescaleResult = TRUE
argument to NCSP()
)# add tiny amount for 0-distance pairs (duplicate profiles)
d <- d + 0.001
# extract nMDS scores from result
mds <- sammon(d)
## Initial stress : 0.02655
## stress after 10 iters: 0.00931, magic = 0.500
## stress after 20 iters: 0.00908, magic = 0.500
## stress after 30 iters: 0.00905, magic = 0.500
mds <- mds$points
# ensure ordering is preserved:
# profile IDs -> distance matrix -> nMDS
all(row.names(mds) == profile_id(z))
## [1] TRUE
Re-scale nMDS coordinates to typical coordinate system used by
plotSPC
.
# re-scale nMDS axis 1 to the typical horizontal scale used by plotSPC
xoff <- aqp:::.rescaleRange(mds[, 1], x0 = 1, x1 = length(z))
# re-scale nMDS axis 2 to the typical vertical scale used by plotSPC
yoff <- aqp:::.rescaleRange(mds[, 2], x0 = -10, x1 = max(z))
Place (“hang”) soil profile sketches according to nMDS ordination (points). Overlap is a problem.
par(mar = c(0.25, 0.25, 3.5, 0.25))
# suppress horizon names with `names = NA`
plotSPC(z, y.offset = yoff, relative.pos = xoff, color = 'genhz', width = 0.25, name.style = 'center-center', name = NA, col.label = 'Generalized Horizon Label', label = 'shortID', scaling.factor = 0.75)
points(xoff, yoff, pch = 16, cex = 0.66)
grid(nx = 10, ny = 10)
abline(v = mean(xoff), h = mean(yoff), lty = 2)
box()
Demonstrate the effect of fixOverlap()
on relative
positions along nMDS axis 1.
# use re-scaled nMDS coordinates as virtual transect
# adjust to reduce overlap
# with an expansion of the x-axis out to length(z) + 5
set.seed(10110)
xoff.fixed <- fixOverlap(xoff, thresh = 0.65, min.x = 1, max.x = length(z) + 5, method = 'S')
# x,y coordinates (offsets) from nMDS axes 1,2
par(mar = c(3, 3, 1, 1))
plot(cbind(xoff, yoff), pch = 1, las = 1, axes = FALSE, ylab = '', xlab = '', xlim = c(min(xoff), max(xoff.fixed)))
# annotate with short IDs
text(cbind(xoff, yoff), labels = z$shortID, cex = 0.66, pos = 1, font = 3)
grid(nx= 10, ny = 10)
mtext('nMDS axis 1', side = 1, line = 1)
mtext('nMDS axis 2', side = 2, line = 1)
# adjusted x offsets, thanks to fixOverlap()
points(cbind(xoff.fixed, yoff), pch = 16)
# annotate with short IDs
text(cbind(xoff.fixed, yoff), labels = z$shortID, cex = 0.66, pos = 3, font = 2)
# arrows connect original, scaled nMDS axis 1 -> adjusted-for-overlap nMDS axis 1
arrows(
x0 = xoff, y1 = yoff,
x1 = xoff.fixed, y0 = yoff,
len = 0.05
)
legend('bottomleft', legend = c('original', 'adjusted'), pch = c(1, 16), box.col = 'white', text.font = c(3,2))
box()
Place soil profile sketches according to approximate positions within
nMDS ordination (points). Note that y-offsets (nMDS axis 2 are inverted,
due to “0 is at the top” depth logic used by plotSPC()
). A
very large output device is required for legible text.
par(mar = c(0.25, 0.25, 3.5, 0.25))
plotSPC(z, y.offset = yoff, relative.pos = xoff.fixed, color = 'genhz', width = 0.25, name.style = 'center-center', cex.names = 0.45, hz.depths = TRUE, col.label = 'Generalized Horizon Label', label = 'shortID', scaling.factor = 0.75)
# add adjusted nMDS points
points(xoff.fixed, yoff, pch = 16, cex = 0.75)
box()
Another approach, leaving out all of the text annotation. Morphologic patterns and their similarity are clear.
par(mar = c(0.25, 0.25, 3.5, 0.25))
plotSPC(z, y.offset = yoff, relative.pos = xoff.fixed, color = 'genhz', width = 0.25, name.style = 'center-center', hz.depths = FALSE, name = NA, col.label = 'Generalized Horizon Label', label = 'shortID', scaling.factor = 0.75)
# grid
grid(nx = 10, ny = 10)
# annotate origin of nMDS axes
points(x = mean(xoff), y = mean(yoff), pch = 3, col = 'black', lwd = 2)
box()
This document is based on aqp
version 2.0.3.