Setup

# stable packages from CRAN
install.packages('aqp')
install.packages('soilDB')
install.packages('sharpshootR')
install.packages('FactoMineR')
install.packages('cowplot')

# latest versions from GitHub
remotes::install_github("ncss-tech/aqp", dependencies=FALSE, upgrade_dependencies=FALSE)
remotes::install_github("ncss-tech/soilDB", dependencies=FALSE, upgrade_dependencies=FALSE)
remotes::install_github("ncss-tech/sharpshootR", dependencies=FALSE, upgrade_dependencies=FALSE)

Hillslope Position / Soil Morphology

library(aqp)
library(soilDB)
library(sharpshootR)

# soil of interest
# https://casoilresource.lawr.ucdavis.edu/gmap/?loc=44.10219,-122.38589,z14
s <- 'peavine'
# s <- 'clarion'

# all siblings
sib <- siblings(s)

# all soils of interest
soils <- c(s, sib$sib$sibling)

# get morphology + extended summaries
osds <- fetchOSD(soils, extended = TRUE)

There may be missing series from either / both SPC and geomorphic summaries. TBC.

# there may be records missing from SPC / hill slope position
nm <- intersect(profile_id(osds$SPC), osds$hillpos$series)

# keep only those series that exist in both
sub <- subset(osds$SPC, profile_id(osds$SPC) %in% nm)

## inverse problem: extra records in hill slope summaries
# subset hillpos
hillpos.sub <- subset(osds$hillpos, subset = series %in% profile_id(sub))

## TODO: use subset objects below
res <- vizHillslopePosition(osds$hillpos)

# re-order hillslope proportions according to clustering
hp <- osds$hillpos[res$order, ]
nm <- names(hp[, 2:6])
series Toeslope Footslope Backslope Shoulder Summit n shannon_entropy
PEAVINE 0.13 0.13 0.31 0.23 0.21 39 2.24
HONEYGROVE 0.19 0.29 0.26 0.00 0.26 80 1.98
PANTHER 0.18 0.82 0.00 0.00 0.00 146 0.68
MINNIECE 0.57 0.43 0.00 0.00 0.00 7 0.99
print(res$fig)

par(mar=c(0,0,1,1))
SoilTaxonomyDendrogram(osds$SPC, scaling.factor = 0.015, width = 0.28, name.style = 'center-center')

# colors
par(mar = c(1, 1, 1, 1))
hp.cols <- RColorBrewer::brewer.pal(n = 5, name = 'Set1')[c(2, 3, 4, 5, 1)]
soilPalette(hp.cols, lab = nm)

Composite Figures

Version 1: Lines

par(mar = c(0.5, 0, 0, 2))
layout(matrix(c(1,2)), widths = c(1,1), heights = c(2,1))
plotProfileDendrogram(osds$SPC, res$clust, dend.y.scale = 3, scaling.factor = 0.012, y.offset = 0.2, width = 0.32, name.style = 'center-center', cex.names = 0.7, shrink = TRUE, cex.id = 0.55)

matplot(y = hp[, 2:6], type = 'b', lty = 1, pch = 16, axes = FALSE, col = hp.cols, xlab = '', ylab = '', xlim = c(0.5, length(osds$SPC) + 1))
# grid(nx = 0, ny = NULL)
axis(side = 4, line = -1, las = 1, cex.axis = 0.7)
# axis(side = 2, line = -3, las = 1, cex.axis = 0.7)
legend('topleft', legend = rev(nm), col = rev(hp.cols), pch = 16, bty = 'n', cex = 0.8, pt.cex = 2, horiz = TRUE, inset = c(0.01, 0.01))
mtext('Probability', side = 2, line = -2, font = 2)

Version 2: Stacked Bars

par(mar = c(0.5, 0, 0, 2))
layout(matrix(c(1,2)), widths = c(1,1), heights = c(2,1))
plotProfileDendrogram(osds$SPC, res$clust, dend.y.scale = 3, scaling.factor = 0.012, y.offset = 0.2, width = 0.32, name.style = 'center-center', cex.names = 0.7, shrink = TRUE, cex.id = 0.55)

sp <- c(1.5, rep(1, times = length(osds$SPC) - 1))
barplot(height = t(as.matrix(hp[, 2:6])), beside = FALSE, width = 0.5, space = sp, col = hp.cols,  axes = FALSE, xlab = '', ylab = '', xlim = c(0.5, length(osds$SPC) + 1), ylim = c(0, 1.2))

legend(x = 0.75, y = 1.2, legend = rev(nm), col = rev(hp.cols), pch = 15, bty = 'n', cex = 0.8, pt.cex = 1.25, horiz = TRUE)
mtext('Probability', side = 2, line = -2, font = 2)

3D Surface Shape

res <- vizSurfaceShape(osds$shape_across, title = 'Surface Shape (Across)')

# re-order proportions according to clustering
ss <- osds$shape_across[res$order, ]
nm <- names(ss[, 2:6])
series Concave Linear Convex Complex Undulating n shannon_entropy
PEAVINE 0.04 0.64 0.32 0 0 25 1.12
HONEYGROVE 0.32 0.62 0.06 0 0 72 1.18
MINNIECE 0.33 0.67 0.00 0 0 9 0.92
PANTHER 0.81 0.19 0.00 0 0 286 0.71
print(res$fig)

par(mar = c(0.5, 0, 1.5, 2))
plotProfileDendrogram(osds$SPC, res$clust, dend.y.scale = 3, scaling.factor = 0.012, y.offset = 0.2, width = 0.32, name.style = 'center-center', cex.names = 0.7, shrink = TRUE, cex.id = 0.55)
title('Surface Shape (Across)')

Correspondance Analysis

library(aqp)
library(soilDB)
library(sharpshootR)

library(FactoMineR)
library(cowplot)

# a classic catena
soils <- c('cecil', 'altavista', 'lloyd', 'wickham', 'wilkes',  'chewacla', 'congaree')
s <- fetchOSD(soils, extended = TRUE)

# create visualizations
hs <- vizHillslopePosition(s$hillpos)
gc <- vizGeomorphicComponent(s$geomcomp)
# montage figures
plot_grid(hs$fig, gc$fig, nrow = 2, rel_heights = c(0.5, 0.5))

CA from Soil Series Geomorphic Summaries

Data are from SSURGO components.

# row-wise proportions add to 1
hs.tab <- (s$hillpos[, 2:6])
row.names(hs.tab) <- s$hillpos$series

# row-wise proportions add to 1
gc.tab <- (s$geomcomp[, 2:7])
row.names(gc.tab) <- s$geomcomp$series

## TODO: think about implications:
# * row-wise proportions ignore relationships to other rows
# * table-wise proportions assume equal weights
# * converting to row-wise counts would over-weight series of great extent

# convert row-wise proportions -> table-wise proportions
hs.tab <- hs.tab / sum(hs.tab)
gc.tab <- gc.tab / sum(gc.tab)

# perform CA
hs.ca <- CA(hs.tab, graph = FALSE)
gc.ca <- CA(gc.tab, graph = FALSE)
# plot
plot(hs.ca, autoLab='yes', title='Hillslope Position', cex=0.75, col.col='firebrick', col.row='royalblue')

plot(gc.ca, autoLab='yes', title='Geomorphic Component', cex=0.75, col.col='firebrick', col.row='royalblue')

## combine lattice + ggplot graphics
p2 <- plot(hs.ca, autoLab='yes', title='Hillslope Position', cex=0.75, col.col='firebrick', col.row='royalblue')

plot_grid(hs$fig, p2, nrow = 2, rel_heights = c(0.4, 0.6))

Multiple Factor Analysis

The results are interesting, but contaminated by missing data: ALTAVISTA and CHEWACLA have < 4 records in the hills geomorphic component summary. These two soils are better described in the terrace summary.

https://journal.r-project.org/archive/2013/RJ-2013-003/RJ-2013-003.pdf

# prepare 2D table
hs.tab <- (s$hillpos[, 1:6])
# prepare 3D table
gc.tab <- (s$geomcomp[, 1:7])

# inner join
# this is the column-wise concatenation of two cross-tabulations
g <- merge(hs.tab, gc.tab, by = 'series', sort = FALSE)
row.names(g) <- g$series
g$series <- NULL


# multiple factor analysis
m <- MFA(g, group = c(5, 6), type = c('f', 'f'), name.group = c('2D', '3D'), graph = FALSE)

plot(m, choix = 'freq', palette = c('black', 'firebrick', 'royalblue'))

Combined 3D Geomorphic Descriptions

sib <- siblings('drummer', only.major = FALSE)
soils <- c('drummer', sib$sib$sibling)

s <- fetchOSD(soils, extended = TRUE)

# TODO: conditionally convert to counts and join if nrow > 1

hl <- data.frame(series = s$geomcomp$series, s$geomcomp[, 2:7] * s$geomcomp$n)

tr <- data.frame(series = s$terrace$series, s$terrace[, 2:3] * s$terrace$n)

fl <- data.frame(series = s$flats$series, s$flats[, 2:3] * s$flats$n)

g <- merge(hl, tr, by = 'series', sort = FALSE, all.x = TRUE, all.y = TRUE)
g <- merge(g, fl, by = 'series', sort = FALSE, all.x = TRUE, all.y = TRUE)

m <- as.matrix(g[, -1])
m[is.na(m)] <- 0

m <- sweep(m, MARGIN = 1, STATS = rowSums(m), FUN = '/')

g <- data.frame(series = g$series, m)

tab <- g[, -1]
row.names(tab) <- g$series

g.ca <- CA(tab, graph = FALSE)


plot(g.ca, autoLab = 'yes', title = 'Combined 3D Geomorphic Description', cex = 0.75, col.col = 'firebrick', col.row = 'royalblue')

summary(g.ca)
## 
## Call:
## CA(X = tab, graph = FALSE) 
## 
## The chi square of independence between the two variables is equal to 33.23084 (p-value =  1 ).
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7   Dim.8   Dim.9
## Variance               0.759   0.601   0.349   0.340   0.194   0.125   0.006   0.001   0.000
## % of var.             31.958  25.331  14.685  14.321   8.170   5.265   0.236   0.035   0.000
## Cumulative % of var.  31.958  57.289  71.974  86.295  94.465  99.730  99.965 100.000 100.000
## 
## Rows (the 10 first)
##              Iner*1000     Dim.1     ctr    cos2     Dim.2     ctr    cos2     Dim.3     ctr
## DRUMMER    |   110.714 |  -0.132   0.163   0.011 |  -1.099  14.357   0.780 |  -0.011   0.002
## ELBURN     |    76.728 |   0.603   3.428   0.339 |   0.230   0.629   0.049 |   0.702  10.106
## HARPSTER   |   215.212 |  -1.336  16.808   0.592 |   1.048  13.057   0.365 |   0.039   0.032
## MATHERTON  |   294.701 |   0.721   4.894   0.126 |  -0.361   1.551   0.032 |   0.130   0.348
## MILFORD    |    76.577 |  -0.485   2.211   0.219 |  -0.370   1.623   0.127 |  -0.084   0.144
## MUNDELEIN  |    61.980 |   0.166   0.260   0.032 |  -0.441   2.308   0.224 |  -0.298   1.819
## SABLE      |    78.564 |  -0.957   8.626   0.833 |   0.420   2.094   0.160 |  -0.067   0.092
## WAYNETOWN  |   231.568 |   1.096  11.305   0.370 |   0.499   2.962   0.077 |  -0.508   5.282
## PELLA      |    66.508 |  -0.195   0.359   0.041 |  -0.913   9.899   0.895 |   0.056   0.064
## SAWMILL    |    81.243 |  -0.460   1.992   0.186 |  -0.504   3.018   0.223 |  -0.096   0.189
##               cos2  
## DRUMMER      0.000 |
## ELBURN       0.459 |
## HARPSTER     0.001 |
## MATHERTON    0.004 |
## MILFORD      0.007 |
## MUNDELEIN    0.102 |
## SABLE        0.004 |
## WAYNETOWN    0.080 |
## PELLA        0.003 |
## SAWMILL      0.008 |
## 
## Columns
##              Iner*1000     Dim.1     ctr    cos2     Dim.2     ctr    cos2     Dim.3     ctr
## Interfluve |   380.983 |   1.005  13.451   0.268 |   0.607   6.195   0.098 |   1.522  67.210
## Crest      |     6.747 |   0.693   0.033   0.037 |   0.297   0.008   0.007 |   1.189   0.211
## Head.Slope |   148.395 |   0.828   1.031   0.053 |  -0.466   0.412   0.017 |   0.221   0.159
## Nose.Slope |    98.905 |   0.828   0.687   0.053 |  -0.466   0.275   0.017 |   0.221   0.106
## Side.Slope |   217.571 |   1.069   5.794   0.202 |   0.291   0.540   0.015 |  -0.371   1.521
## Base.Slope |   301.037 |  -0.015   0.004   0.000 |  -1.166  31.904   0.637 |   0.074   0.222
## Tread      |   408.953 |   1.147  28.799   0.534 |   0.663  12.154   0.179 |  -0.758  27.379
## Riser      |    23.493 |   1.525   0.554   0.179 |   1.123   0.379   0.097 |  -2.286   2.709
## Dip        |   532.101 |  -1.276  47.820   0.682 |   0.847  26.570   0.300 |  -0.010   0.006
## Talf       |   255.448 |  -0.212   1.826   0.054 |  -0.648  21.563   0.508 |  -0.073   0.478
##               cos2  
## Interfluve   0.615 |
## Crest        0.109 |
## Head.Slope   0.004 |
## Nose.Slope   0.004 |
## Side.Slope   0.024 |
## Base.Slope   0.003 |
## Tread        0.233 |
## Riser        0.402 |
## Dip          0.000 |
## Talf         0.007 |
dimdesc(g.ca)
## $`Dim 1`
## $`Dim 1`$row
##                 coord
## PEOTONE    -1.4455315
## HARPSTER   -1.3360293
## SABLE      -0.9570895
## MILFORD    -0.4845874
## SAWMILL    -0.4599926
## PELLA      -0.1952321
## DRUMMER    -0.1316016
## ELPASO     -0.1046382
## MUNDELEIN   0.1663111
## ELBURN      0.6034048
## MATHERTON   0.7209480
## WAYNETOWN   1.0957302
## BARRINGTON  1.2002449
## THACKERY    1.3280631
## 
## $`Dim 1`$col
##                  coord
## Dip        -1.27558702
## Talf       -0.21174105
## Base.Slope -0.01544855
## Crest       0.69280927
## Head.Slope  0.82776845
## Nose.Slope  0.82776845
## Interfluve  1.00465072
## Side.Slope  1.06918713
## Tread       1.14712534
## Riser       1.52483780
## 
## 
## $`Dim 2`
## $`Dim 2`$row
##                 coord
## ELPASO     -1.2457080
## DRUMMER    -1.0993364
## PELLA      -0.9128187
## SAWMILL    -0.5040097
## MUNDELEIN  -0.4407945
## MILFORD    -0.3696086
## MATHERTON  -0.3613718
## ELBURN      0.2301733
## SABLE       0.4198667
## WAYNETOWN   0.4993712
## BARRINGTON  0.8035946
## THACKERY    0.8706193
## HARPSTER    1.0483716
## PEOTONE     1.0616510
## 
## $`Dim 2`$col
##                 coord
## Base.Slope -1.1655870
## Talf       -0.6477906
## Nose.Slope -0.4660393
## Head.Slope -0.4660393
## Side.Slope  0.2906359
## Crest       0.2968405
## Interfluve  0.6069753
## Tread       0.6634666
## Dip         0.8465266
## Riser       1.1227851
knitr::kable(g, digits = 2)
series Interfluve Crest Head.Slope Nose.Slope Side.Slope Base.Slope Tread Riser Dip Talf
DRUMMER 0.00 0.00 0.00 0.00 0.00 0.53 0.02 0.00 0.03 0.43
ELBURN 0.38 0.01 0.00 0.00 0.04 0.06 0.19 0.00 0.05 0.27
HARPSTER 0.03 0.00 0.00 0.00 0.00 0.01 0.01 0.00 0.94 0.01
MATHERTON 0.11 0.00 0.16 0.11 0.16 0.27 0.13 0.00 0.00 0.06
MILFORD 0.01 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.22 0.76
MUNDELEIN 0.03 0.00 0.00 0.00 0.00 0.06 0.23 0.00 0.00 0.69
SABLE 0.00 0.00 0.00 0.00 0.00 0.04 0.03 0.00 0.64 0.29
WAYNETOWN 0.11 0.00 0.00 0.00 0.33 0.00 0.44 0.00 0.00 0.11
PELLA 0.03 0.00 0.00 0.00 0.00 0.37 0.00 0.00 0.06 0.54
SAWMILL 0.00 0.00 0.00 0.00 0.00 0.03 0.00 0.00 0.18 0.78
ELPASO 0.00 0.00 0.00 0.00 0.00 0.61 0.00 0.00 0.00 0.39
THACKERY 0.00 0.00 0.00 0.00 0.00 0.00 0.97 0.03 0.00 0.00
BARRINGTON 0.71 0.00 0.00 0.00 0.00 0.00 0.29 0.00 0.00 0.00
PEOTONE 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.98 0.02

This document is based on aqp version 2.3, soilDB version 2.8.14, and sharpshootR version 2.4.1.