Question: is it possible to generate a reasonable sorting of soil profiles (a SoilProfileCollection) using pair-wise distances computed only from generalized horizon labels (GHL)? I think that it might, as long as these labels are encoded as ordered factors. We can use profile_compare to test this hypothesis. One might attempt such a comparison within a collection of similar soils where the only reliable data available are horizon depths and designations.

We will be using a subset of the the loafercreek sample data from the {soilDB} package.

library(aqp)
library(soilDB)
library(sharpshootR)

library(cluster)
library(dendextend)
library(MASS)

library(corrplot)

# example data
data("loafercreek")
hzdesgnname(loafercreek) <- 'hzname'

# focus on profiles 20-40
x <- loafercreek[20:40, ]

# modify default arguments for some flair
par(mar = c(0, 0, 0, 2))
plotSPC(x, width = 0.33, name.style = 'center-center', cex.names = 0.65, print.id = FALSE)

Apply generalized horizon labels using overly-simplistic rules. This is only a demonstration.

# new labels
n <- c('O', 'A', 'B', 'Bt', 'BC', 'Cr', 'R')
# GHL patterns
p <- c('O', 'A', 'B', 'Bt', 'BC', 'Cr', 'R')

# note additional argument
horizons(x)$genhz <- generalize.hz(x$hzname, new = n, pat = p)

# the most important step, genhz must be encoded as an ordered factor
x$genhz <- ordered(x$genhz)

# check frequency of GHL assignment
# note that some are missing
table(x$genhz)
## 
##        O        A        B       Bt       BC       Cr        R not-used 
##        3       22       18       66       11        8       21        2

Remove profiles containing horizons assigned to the “not-used” GHL.

x$flag <- profileApply(x, function(i) {
  any(i$genhz == 'not-used')
})

# SPC-aware version of subset()
y <- subset(x, !flag)

# check: ok
table(y$genhz)
## 
##        O        A        B       Bt       BC       Cr        R not-used 
##        3       20       16       59       11        8       19        0
# add short ID for checking our work
site(y)$shortID <- 1:length(y)

par(mar = c(0, 0, 3, 2))
plotSPC(y, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, fixLabelCollisions = FALSE, col.label = 'Generalized Horizon Label', label = 'shortID')

Add empty horizons to bottom of the profiles with bottom-most horizon depth < 150cm, label with ‘R’ GHL. Truncate deeper profiles to 150cm. See ?fillHzGaps and ?trunc for details.

# extend with empty horizons
z <- fillHzGaps(y, to_top = NA, to_bottom = 150)

# label as 'R'
# important to use one of the GHL specified in `genhz`
z$hzname[is.na(z$genhz)] <- 'R'
z$genhz[is.na(z$genhz)] <- 'R'

# truncate all profiles to 150cm
z <- trunc(z, z1 = 0, z2 = 150)

par(mar = c(0, 0, 3, 2))
plotSPC(z, color = 'genhz', width = 0.33, name.style = 'center-center',  cex.names = 0.65, col.label = 'Generalized Horizon Label', label = 'shortID')

Arrange profiles by depth to contact using plot.order argument to plotSPC. See ?getSoilDepthClass and ?estimateSoilDepth for details on estimating soil depth.

# add soil depth information to site
sdc <- getSoilDepthClass(z)
site(z) <- sdc

par(mar = c(0, 0, 3, 2))
plotSPC(z, color = 'genhz', width = 0.33, name.style = 'center-center', cex.names = 0.65, plot.order = order(z$depth), col.label = 'Generalized Horizon Label', label = 'shortID')