Pair-Wise Distance Between Profiles
Mixed Variable Types

D.E. Beaudette
2016-08-17
This document is based on aqp version 1.9.11 and sharpshootR version 0.9.7.

Introduction

Setup R Envionment

If you have never used the aqp or soilDB packages before, you will likely need to install them. This only needs to be done once.

# stable versions + deps
install.packages("aqp", dep = TRUE)
install.packages("soilDB", dep = TRUE)
install.packages("sharpshootR", dep = TRUE)

# latest versions
devtools::install_github("ncss-tech/aqp", dependencies = FALSE, upgrade_dependencies = FALSE)
devtools::install_github("ncss-tech/sharpshootR", dependencies = FALSE, upgrade_dependencies = FALSE)

Once you have all of the R packages on which this document depends, it is a good idea to load them. R packages must be installed anytime you change versions of R (e.g., after an upgrade) and loaded anytime you want to access functions from within those packages.

library(aqp)
library(soilDB)
library(sharpshootR)
library(cluster)
library(RColorBrewer)

Sample Data

While the methods outlined in this document can be applied to any collection of pedons, it is convenient to work with a standardized set of data. You can follow along with the analysis by copying code from the following blocks and running it in your R session. The sample data used in this document is based on soil profiles that have been correlated to the Loafercreek soil series from the Sierra Nevada Foothill Region of California. Note that the internal structure of the loafercreek data is identical to the structure returned by fetchNASIS() from the soilDB package. All horizon-level values are pulled from the pedon horizon table of the pedons being analyzed.

# load sample data from the soilDB package
data(loafercreek, package = "soilDB")

# get a subset of profiles to work with
x <- loafercreek[20:30, ]

Pedons correlated to the Loafercreek soil series.


Methods

Check Data

# check existing texture
table(x$texture_class)
br cl l scl sil spm
14 6 27 3 7 6
# order field texture according to plant available water low -> high note: leaving out 'in-lieu-of'
# texture classes for organic or bedrock
x$texture_class <- factor(x$texture_class, ordered = TRUE, levels = c("sl", "scl", "l", "sil", "c", "sic", 
    "cl", "sicl"))

# graphical check
par(mar = c(0, 1, 3, 1))
plot(x, color = "texture_class", label = "pedon_id", col.palette = brewer.pal(10, "Spectral"))
addVolumeFraction(x, "total_frags_pct")

# make a fake name with texture class + RF volume:
x$fake.name <- ifelse(!is.na(x$texture_class), paste0(x$texture_class, " - ", x$total_frags_pct), "")
plot(x, color = "texture_class", name = "fake.name", label = "pedon_id", cex.name = 0.65, col.palette = brewer.pal(10, 
    "Spectral"))
addVolumeFraction(x, "total_frags_pct")


Horizon Level Attributes

# make an index to soil horizons (anything other than O|Cr|R)
idx <- grep("O|Cr|R", x$hzname, invert = TRUE)

# compute pair-wise distances using texture class (ordered factor) and rock frag volume
d.hz <- profile_compare(x, vars = c("texture_class", "total_frags_pct"), max_d = 100, k = 0, filter = idx, 
    replace_na = TRUE, add_soil_flag = TRUE)
# divisive hierarchical clustering
dd.hz <- diana(d.hz)

# plot dendrogram + profiles
par(mar = c(2, 1, 3, 1))
plotProfileDendrogram(x, dd.hz, scaling.factor = 0.75, y.offset = 5, width = 0.15, color = "texture_class", 
    name = "fake.name", label = "pedon_id", cex.name = 0.75, col.palette = brewer.pal(10, "Spectral"))
addVolumeFraction(x, "total_frags_pct")


Site and Horizon Level Attributes

# fix some missing data
x$elev_field[x$elev_field == 0] <- 380

# graphical check: order pedons by slope
par(mar = c(0, 1, 3, 1))
plot(x, color = "texture_class", label = "pedon_id", col.palette = brewer.pal(10, "Spectral"), plot.order = order(x$slope_field))
addVolumeFraction(x, "total_frags_pct")
axis(side = 1, at = 1:length(x), labels = x$slope_field[order(x$slope_field)], line = -3)
mtext(text = "Field Described Slope (%)", side = 1, line = -4)

# graphical check: order pedons by elevation
par(mar = c(0, 1, 3, 1))
plot(x, color = "texture_class", label = "pedon_id", col.palette = brewer.pal(10, "Spectral"), plot.order = order(x$elev_field))
addVolumeFraction(x, "total_frags_pct")
axis(side = 1, at = 1:length(x), labels = x$elev_field[order(x$elev_field)], line = -3)
mtext(text = "Field Described Slope (m)", side = 1, line = -4)


# compute pair-wise distances using: hz attributes: texture class (ordered factor) and rock frag
# volume site attributes: surface slope and elevation
d.hz.site <- profile_compare(x, vars = c("texture_class", "total_frags_pct", "slope_field", "elev_field"), 
    max_d = 100, k = 0, filter = idx, replace_na = TRUE, add_soil_flag = TRUE)
# divisive hierarchical clustering
dd.hz.site <- diana(d.hz.site)

# plot dendrogram + profiles
par(mar = c(2, 1, 3, 1))
plotProfileDendrogram(x, dd.hz.site, scaling.factor = 0.008, y.offset = 0.1, width = 0.15, color = "texture_class", 
    name = "fake.name", label = "pedon_id", cex.name = 0.75, col.palette = brewer.pal(10, "Spectral"))
addVolumeFraction(x, "total_frags_pct")