Introduction

Setup R Envionment

If you have never used the aqp or soilDB packages before, you will likely need to install them. This only needs to be done once.

# stable versions + deps
install.packages('aqp', dep=TRUE)
install.packages('soilDB', dep=TRUE)
install.packages('sharpshootR', dep=TRUE)

# latest versions
devtools::install_github("ncss-tech/aqp", dependencies=FALSE, upgrade_dependencies=FALSE)
devtools::install_github("ncss-tech/sharpshootR", dependencies=FALSE, upgrade_dependencies=FALSE)

Once you have all of the R packages on which this document depends, it is a good idea to load them. R packages must be installed anytime you change versions of R (e.g., after an upgrade) and loaded anytime you want to access functions from within those packages.

library(aqp)
library(soilDB)
library(sharpshootR)
library(cluster)
library(RColorBrewer)
library(ape)
library(dendextend)

Simple Example via Soil Texture

# soil textures, sorted according to field book
data("ROSETTA.centroids")

# ordered vs. nominal texture classes
tex.ordered <- ROSETTA.centroids$texture
tex.nominal <- factor(ROSETTA.centroids$texture, levels=sort(as.character(ROSETTA.centroids$texture)), ordered = FALSE)

# assemble into DF, note that stringsAsFactors=FALSE required
x.1 <- data.frame(
  p1=tex.ordered,
  stringsAsFactors = FALSE
  )

x.2 <- data.frame(
  p1=tex.nominal,
  stringsAsFactors = FALSE
)

# copy texture classes to row names for automatic distance matrix / clustering labels
row.names(x.1) <- x.1$p1                  
row.names(x.2) <- x.2$p1

# 1D clustering
d.1 <- daisy(x.1[, 1, drop=FALSE], metric = 'gower') 
d.2 <- daisy(x.2[, 1, drop=FALSE], metric = 'gower')

## note differences
# d.1
# d.2

# check: pair-wise distances should reflect information contained in ranks of ordinal representation
par(mar=c(1,1,3,1), mfcol=c(1,2))
plot(as.phylo(as.hclust(diana(d.1))), main='Ordinal', label.offset=0.01)
plot(as.phylo(as.hclust(diana(d.2))), main='Nominal', label.offset=0.01)

Another reason to use ordered factors when possible: cophenetic correlation.

# ordinal
cor(d.1, cophenetic(as.hclust(diana(d.1))))

## [1] 0.7105597

# nominal
cor(d.2, cophenetic(as.hclust(diana(d.2))))

## Warning in cor(d.2, cophenetic(as.hclust(diana(d.2)))): the standard deviation is zero

## [1] NA

Sample Data

While the methods outlined in this document can be applied to any collection of pedons, it is convenient to work with a standardized set of data. You can follow along with the analysis by copying code from the following blocks and running it in your R session. The sample data used in this document is based on soil profiles that have been correlated to the Loafercreek soil series from the Sierra Nevada Foothill Region of California. Note that the internal structure of the loafercreek data is identical to the structure returned by fetchNASIS() from the soilDB package. All horizon-level values are pulled from the pedon horizon table of the pedons being analyzed.

# load sample data from the soilDB package
data(loafercreek, package = 'soilDB')

# get a subset of profiles to work with
x <- loafercreek[10:20, ]

Methods

Check Data

# check existing textures
table(x$texture_class)

## 
##  br   c  cb  cl  gr   l scl sil  sl 
##   9   4   1  13   2  32   1   5   2

# order field texture according to particle size
# note: leaving out "in-lieu-of" texture classes for organic soil material and bedrock
x$texture_class <- factor(x$texture_class, ordered = TRUE, levels = SoilTextureLevels())
x$texture_class <- droplevels(x$texture_class)

# make a copy same order, but not an ordered factor
x$texture_class_nominal <- factor(x$texture_class, levels=levels(x$texture_class), ordered = FALSE)

# graphical check
par(mar=c(0,1,3,1))
plot(x, color='texture_class', label='pedon_id')

plot(x, color='texture_class_nominal', label='pedon_id')

Horizon Level Attributes

# compute pair-wise distances using texture class (ordered factor)
d.hz <- NCSP(x, vars = c('texture_class'), maxDepth = 100, rescaleResult = TRUE)
# divisive hierarchical clustering
dd.hz <- diana(d.hz)

# plot dendrogram + profiles
par(mar = c(1,0,3,1))
plotProfileDendrogram(x, dd.hz,  width=0.25, color = 'texture_class', label='pedon_id', cex.name=0.65)

# compute pair-wise distances using texture class (nominal factor)
d.hz <- NCSP(x, vars = c('texture_class_nominal'), maxDepth = 100, rescaleResult = TRUE)
# divisive hierarchical clustering
dd.hz <- diana(d.hz)

# plot dendrogram + profiles
par(mar = c(1,0,3,1))
plotProfileDendrogram(x, dd.hz,  width=0.25, color='texture_class_nominal', label='pedon_id', cex.name=0.65)

Compare Dendrograms

d.ordinal <- NCSP(x, vars=c('texture_class'), maxDepth = 100, rescaleResult = TRUE)
d.nominal <- NCSP(x, vars=c('texture_class_nominal'), maxDepth = 100, rescaleResult = TRUE)

dendextend::tanglegram(as.dendrogram(diana(d.ordinal)), as.dendrogram(diana(d.nominal)))

Site and Horizon Level Attributes

# fix some missing data
x$elev_field[x$elev_field == 0] <- 380

# graphical check: order pedons by slope
par(mar=c(0,1,3,1))
plot(x, color='texture_class', label='pedon_id', col.palette=brewer.pal(10, 'Spectral'), plot.order=order(x$slope_field))
addVolumeFraction(x, 'total_frags_pct')
axis(side=1, at=1:length(x), labels=x$slope_field[order(x$slope_field)], line=-3)
mtext(text='Field Described Slope (%)', side=1, line=-4)

# graphical check: order pedons by elevation
par(mar=c(0,1,3,1))
plot(x, color='texture_class', label='pedon_id', col.palette=brewer.pal(10, 'Spectral'), plot.order=order(x$elev_field))
addVolumeFraction(x, 'total_frags_pct')
axis(side=1, at=1:length(x), labels=x$elev_field[order(x$elev_field)], line=-3)
mtext(text='Field Described Elevation (m)', side=1, line=-4)

\(D = (D_{s} * w_{s}) + (D_{h} * w_{h}) / (w_{s} + w_{h})\)

\(\sum_{elements} D{i} * w_{i} / \sum w_{i}\)

# hz attributes: texture class (ordered factor) and rock frag volume

D.hz <- NCSP(x, vars = c('texture_class', 'total_frags_pct'), maxDepth = 100, rescaleResult = TRUE)
w.hz <- 1

# site attributes: surface slope and elevation
D.s <- compareSites(x, vars = c('slope_field', 'elev_field'))
w.s <- 1

# combine
# note special syntaxt to add distance mat. by element
D <- Reduce(`+`, list(D.s * w.s, D.hz * w.hz)) / sum(c(w.s, w.hz))

# divisive hierarchical clustering
dd.hz.site <- diana(D)

# plot dendrogram + profiles
par(mar = c(2,1,3,1))
plotProfileDendrogram(x, dd.hz.site, scaling.factor = 0.008, y.offset = 0.1, width = 0.2, color='texture_class', label='pedon_id', cex.name=0.75, col.palette=brewer.pal(10, 'Spectral'), max.depth = 125)

addVolumeFraction(x, 'total_frags_pct')

This document is based on aqp version 2.0, soilDB version 2.7.8, and sharpshootR version 2.2.

Pair-Wise Distance Between Profiles and Mixed Variable Types

D.E. Beaudette

2023-06-08