Setup

# stable packages from CRAN
install.packages('aqp')
install.packages('soilDB')
install.packages('sharpshootR')
install.packages('FactoMineR')
install.packages('cowplot')

# latest versions from GitHub
remotes::install_github("ncss-tech/aqp", dependencies=FALSE, upgrade_dependencies=FALSE)
remotes::install_github("ncss-tech/soilDB", dependencies=FALSE, upgrade_dependencies=FALSE)
remotes::install_github("ncss-tech/sharpshootR", dependencies=FALSE, upgrade_dependencies=FALSE)

Hillslope Position / Soil Morphology

library(aqp)
library(soilDB)
library(sharpshootR)

# soil of interest
# https://casoilresource.lawr.ucdavis.edu/gmap/?loc=44.10219,-122.38589,z14
s <- 'peavine'
# s <- 'clarion'

# all siblings
sib <- siblings(s)

# all soils of interest
soils <- c(s, sib$sib$sibling)

# get morphology + extended summaries
osds <- fetchOSD(soils, extended = TRUE)

There may be missing series from either / both SPC and geomorphic summaries. TBC.

# there may be records missing from SPC / hill slope position
nm <- intersect(profile_id(osd$SPC), osd$hillpos$series)

# keep only those series that exist in both
sub <- subset(osd$SPC, profile_id(osd$SPC) %in% nm)

## inverse problem: extra records in hill slope summaries
# subset hillpos
hillpos.sub <- subset(osd$hillpos, subset = series %in% profile_id(sub))

## TODO: use subset objects below

res <- vizHillslopePosition(osds$hillpos)

# re-order hillslope proportions according to clustering
hp <- osds$hillpos[res$order, ]
nm <- names(hp[, 2:6])

series	Toeslope	Footslope	Backslope	Shoulder	Summit	n	shannon_entropy
REMOTE	0.00	0.00	0.53	0.13	0.33	30	1.40
DIGGER	0.00	0.00	0.59	0.19	0.22	64	1.38
FORMADER	0.00	0.00	0.43	0.36	0.21	14	1.53
BOHANNON	0.00	0.00	0.46	0.37	0.18	136	1.49
SHIVIGNY	0.00	0.00	0.59	0.34	0.06	32	1.23
MEDA	0.00	0.25	0.25	0.25	0.25	4	2.00
PEAVINE	0.13	0.13	0.31	0.23	0.21	39	2.24
SLICKROCK	0.03	0.25	0.33	0.15	0.24	72	2.08
PREACHER	0.10	0.21	0.40	0.06	0.23	113	2.07
HONEYGROVE	0.19	0.29	0.26	0.00	0.26	80	1.98
HEMCROSS	0.18	0.38	0.21	0.00	0.23	39	1.93
PANTHER	0.18	0.82	0.00	0.00	0.00	146	0.68
MINNIECE	0.57	0.43	0.00	0.00	0.00	7	0.99

print(res$fig)

par(mar=c(0,0,1,1))
SoilTaxonomyDendrogram(osds$SPC, scaling.factor = 0.015, width = 0.28, name.style = 'center-center')

# colors
par(mar = c(1, 1, 1, 1))
hp.cols <- RColorBrewer::brewer.pal(n = 5, name = 'Set1')[c(2, 3, 4, 5, 1)]
soilPalette(hp.cols, lab = nm)

Composite Figures

Version 1: Lines

par(mar = c(0.5, 0, 0, 2))
layout(matrix(c(1,2)), widths = c(1,1), heights = c(2,1))
plotProfileDendrogram(osds$SPC, res$clust, dend.y.scale = 3, scaling.factor = 0.012, y.offset = 0.2, width = 0.32, name.style = 'center-center', cex.names = 0.7, shrink = TRUE, cex.id = 0.55)

matplot(y = hp[, 2:6], type = 'b', lty = 1, pch = 16, axes = FALSE, col = hp.cols, xlab = '', ylab = '', xlim = c(0.5, length(osds$SPC) + 1))
# grid(nx = 0, ny = NULL)
axis(side = 4, line = -1, las = 1, cex.axis = 0.7)
# axis(side = 2, line = -3, las = 1, cex.axis = 0.7)
legend('topleft', legend = rev(nm), col = rev(hp.cols), pch = 16, bty = 'n', cex = 0.8, pt.cex = 2, horiz = TRUE, inset = c(0.01, 0.01))
mtext('Probability', side = 2, line = -2, font = 2)

Version 2: Stacked Bars

par(mar = c(0.5, 0, 0, 2))
layout(matrix(c(1,2)), widths = c(1,1), heights = c(2,1))
plotProfileDendrogram(osds$SPC, res$clust, dend.y.scale = 3, scaling.factor = 0.012, y.offset = 0.2, width = 0.32, name.style = 'center-center', cex.names = 0.7, shrink = TRUE, cex.id = 0.55)

sp <- c(1.5, rep(1, times = length(osds$SPC) - 1))
barplot(height = t(as.matrix(hp[, 2:6])), beside = FALSE, width = 0.5, space = sp, col = hp.cols,  axes = FALSE, xlab = '', ylab = '', xlim = c(0.5, length(osds$SPC) + 1), ylim = c(0, 1.2))

legend(x = 0.75, y = 1.2, legend = rev(nm), col = rev(hp.cols), pch = 15, bty = 'n', cex = 0.8, pt.cex = 1.25, horiz = TRUE)
mtext('Probability', side = 2, line = -2, font = 2)

3D Surface Shape

res <- vizSurfaceShape(osds$shape_across, title = 'Surface Shape (Across)')

# re-order proportions according to clustering
ss <- osds$shape_across[res$order, ]
nm <- names(ss[, 2:6])

series	Concave	Linear	Convex	n	shannon_entropy
SHIVIGNY	0.00	0.00	1.00	32	0.00
FORMADER	0.12	0.12	0.76	17	1.02
DIGGER	0.13	0.09	0.77	53	0.99
BOHANNON	0.16	0.16	0.68	103	1.22
SLICKROCK	0.21	0.19	0.60	53	1.36
REMOTE	0.41	0.07	0.52	27	1.30
PEAVINE	0.04	0.64	0.32	25	1.12
PREACHER	0.28	0.34	0.38	109	1.58
MEDA	0.33	0.47	0.20	15	1.51
HEMCROSS	0.45	0.39	0.16	49	1.48
HONEYGROVE	0.32	0.62	0.06	72	1.18
MINNIECE	0.33	0.67	0.00	9	0.92
PANTHER	0.81	0.19	0.00	286	0.71

print(res$fig)

par(mar = c(0.5, 0, 1.5, 2))
plotProfileDendrogram(osds$SPC, res$clust, dend.y.scale = 3, scaling.factor = 0.012, y.offset = 0.2, width = 0.32, name.style = 'center-center', cex.names = 0.7, shrink = TRUE, cex.id = 0.55)
title('Surface Shape (Across)')

Correspondance Analysis

library(aqp)
library(soilDB)
library(sharpshootR)

library(FactoMineR)
library(cowplot)

# a classic catena
soils <- c('cecil', 'altavista', 'lloyd', 'wickham', 'wilkes',  'chewacla', 'congaree')
s <- fetchOSD(soils, extended = TRUE)

# create visualizations
hs <- vizHillslopePosition(s$hillpos)
gc <- vizGeomorphicComponent(s$geomcomp)

# montage figures
plot_grid(hs$fig, gc$fig, nrow = 2, rel_heights = c(0.5, 0.5))

CA from Soil Series Geomorphic Summaries

Data are from SSURGO components.

# row-wise proportions add to 1
hs.tab <- (s$hillpos[, 2:6])
row.names(hs.tab) <- s$hillpos$series

# row-wise proportions add to 1
gc.tab <- (s$geomcomp[, 2:7])
row.names(gc.tab) <- s$geomcomp$series

## TODO: think about implications:
# * row-wise proportions ignore relationships to other rows
# * table-wise proportions assume equal weights
# * converting to row-wise counts would over-weight series of great extent

# convert row-wise proportions -> table-wise proportions
hs.tab <- hs.tab / sum(hs.tab)
gc.tab <- gc.tab / sum(gc.tab)

# perform CA
hs.ca <- CA(hs.tab, graph = FALSE)
gc.ca <- CA(gc.tab, graph = FALSE)

# plot
plot(hs.ca, autoLab='yes', title='Hillslope Position', cex=0.75, col.col='firebrick', col.row='royalblue')

plot(gc.ca, autoLab='yes', title='Geomorphic Component', cex=0.75, col.col='firebrick', col.row='royalblue')

## combine lattice + ggplot graphics
p2 <- plot(hs.ca, autoLab='yes', title='Hillslope Position', cex=0.75, col.col='firebrick', col.row='royalblue')

plot_grid(hs$fig, p2, nrow = 2, rel_heights = c(0.4, 0.6))

Multiple Factor Analysis

This is close, but doesn’t represent the cross-tabulation of 2D vs. 3D by series. Not sure how to interpret it though.

https://journal.r-project.org/archive/2013/RJ-2013-003/RJ-2013-003.pdf

# prepare 2D table
hs.tab <- (s$hillpos[, 1:6])
# prepare 3D table
gc.tab <- (s$geomcomp[, 1:7])

# inner join
# this is the column-wise concatenation of two cross-tabulations
g <- merge(hs.tab, gc.tab, by = 'series', sort = FALSE)
row.names(g) <- g$series
g$series <- NULL


# ??
# convert to table-wise proportions
g <- g / sum(g)


# multiple factor analysis
m <- MFA(g, group = c(5, 6), type = c('f', 'f'), name.group = c('2D', '3D'), graph = FALSE)

# now... how can this be interpreted?
plot(m, type = 'ind', partial = 'all')

plot(m, type = 'ind')

This document is based on aqp version 2.0, soilDB version 2.7.7, and sharpshootR version 2.0.

Exploring Geomorphic Summaries

D.E. Beaudette

2023-02-26