A soil color / transition probability (TP) matrix experiment. Transition probabilities are computed in horizon depth order, starting from the top of each profile. The TP matrix can be used to investigate likely sequences and end-points, or, to simulate sequences using Markov chains.

Use pedon data associated with the Holland soil series.

library(aqp)
library(soilDB)
library(sharpshootR)
library(igraph)
library(reshape2)
library(markovchain)
library(RColorBrewer)
library(scales)

# get lab / morphologic data
x <- fetchKSSL(series = 'holland', returnMorphologicData = TRUE, simplifyColors = TRUE)

# extract pedons into SoilProfileCollection
s <- x$SPC

# remove horizons that are missing moist colors
s <- subsetHz(s, !is.na(m_hue) & !is.na(m_value) & !is.na(m_chroma))

# remove profiles with missing horizons due to above steps
s <- HzDepthLogicSubset(s)

# re-assemble Munsell color notation for moist color
s$color <- sprintf("%s %s/%s", s$m_hue, s$m_value, s$m_chroma)

Moist color changes with depth.

par(mar = c(0, 0, 2, 0))
plotSPC(s, color = 'moist_soil_color', print.id = FALSE, name = NA, depth.axis = FALSE, width = 0.4)
mtext('KSSL data correllated to Holland series', at=0.5, adj = 0)

KSSL data correllated to Holland series

Compute depth-wise transition probability matrix for moist colors. Visualize as a graph organized by communities.

# # remove profile #10
# idx <- (1:length(s))[-10]
# s <- s[idx, ]

# generate TP matrix from horizon moist colors
tp <- hzTransitionProbabilities(s, name = "color", loopTerminalStates = FALSE)

# greate graph object
par(mar = c(1, 1, 1, 1))
g <- plotSoilRelationGraph(tp, graph.mode = "directed", vertex.scaling.factor=2, edge.arrow.size = 0.5, edge.scaling.factor = 2.5, vertex.label.cex = 0.75, vertex.label.family = "sans")

10YR 2/2 10YR 3/2 10YR 3/3 10YR 3/4 10YR 4/2 10YR 4/3 10YR 4/4 10YR 5/4 10YR 5/6 2.5Y 4/3 2.5Y 6/2 2.5YR 3/4 2.5YR 3/6 5Y 4/2 5YR 3/3 5YR 3/4 5YR 4/3 5YR 4/4 5YR 4/6 5YR 4/8 5YR 5/5 5YR 5/6 5YR 5/7 5YR 5/8 7.5YR 3/2 7.5YR 3/3 7.5YR 3/4 7.5YR 4/3 7.5YR 4/4 7.5YR 5/4 7.5YR 5/6 7.5YR 5/8 7.5YR 6/6 7.5YR 6/8

Sketch profiles using same colors as community colors in network graph.

# get clustering vector and colors names from graph
cl <- data.frame(color = V(g)$name, cluster = V(g)$cluster, stringsAsFactors = FALSE)

# join with SPC horizons on `color` column
horizons(s) <- cl

# hack re-recreate colors used by plotSoilRelationGraph
# good reminder to return more from that function...
cols <- colorRampPalette(brewer.pal(n = 9, name = "Set1"))(max(cl$cluster))
s$cluster_color <- alpha(cols[s$cluster], 0.65)

# profile sketches, colors match communities in graph above
par(mar = c(0, 0, 1, 1))
plotSPC(s, color = 'cluster_color', print.id = FALSE, name.style = 'center-center', width = 0.35)
mtext('KSSL data correllated to Holland series', at = 0.5, adj = 0)

A AB BAt Bt1 Bt2 A1 A2 BA B1 B2 A AB Bt1 Bt2 A1 A2 B Bt11 Bt12 Bt2 C A11 A12 A2 B11 B12 B2 C A1 A2 A3 Bt1 Bt21 Bt22 Bt23 C A11 A12 B1t B2t C1 C2 A11 A12 B1 B21t B22t A11 A12 B21t B22t B23t B3t A11 A12 B1 B21t B22t B3 C1 C2 A11 A12 B2t C1 A1 B1 B2t A11 A12 B21t B22t B3 A11 A12 B1t B2t B3t C1 C2 350 cm 300 cm 250 cm 200 cm 150 cm 100 cm 50 cm 0 cm KSSL data correllated to Holland series

Visualize as graph with vertices colored according to soil color.

# join Munsell color notation to graph nodes
d <- data.frame(color = V(g)$name, stringsAsFactors = FALSE)

# get the first color
d <- plyr::join(d, horizons(s), by = 'color', type = 'left', match = 'first')
V(g)$color <- d$moist_soil_color

# prepare labels by converting to HSV
hsv.cols <- t(rgb2hsv(col2rgb(V(g)$color)))
hsv.cols[, 1] <- 0
hsv.cols[, 2] <- 0
hsv.cols[, 3] <- ifelse(hsv.cols[, 3] > 0.5, 0, 1)
V(g)$label.color <- hsv(hsv.cols[, 1], hsv.cols[, 2], hsv.cols[, 3])

# remove loops from graph, retain duplicate paths
g <- simplify(g, remove.loops = TRUE, remove.multiple = FALSE)

# final plot
par(mar = c(0,0,1,0), bg = grey(0.85))
set.seed(1010101)
plot(g, edge.arrow.size = 0.5, vertex.label.cex = 0.55, vertex.label.family = "sans", vertex.label.font=2, edge.color='black')

10YR 2/2 10YR 3/2 10YR 3/3 10YR 3/4 10YR 4/2 10YR 4/3 10YR 4/4 10YR 5/4 10YR 5/6 2.5Y 4/3 2.5Y 6/2 2.5YR 3/4 2.5YR 3/6 5Y 4/2 5YR 3/3 5YR 3/4 5YR 4/3 5YR 4/4 5YR 4/6 5YR 4/8 5YR 5/5 5YR 5/6 5YR 5/7 5YR 5/8 7.5YR 3/2 7.5YR 3/3 7.5YR 3/4 7.5YR 4/3 7.5YR 4/4 7.5YR 5/4 7.5YR 5/6 7.5YR 5/8 7.5YR 6/6 7.5YR 6/8

Simulate moist color sequences using Markov chains derived from transition probability matrix. Weight the TP matrix (and MC?) by working from 1cm slices. Need to think about this some more.

# re-make TP matrix, this time including terminal loops
s.slices <- dice(s, 0:150 ~ .)
tp.loops <- hzTransitionProbabilities(s.slices, name = "color", loopTerminalStates = TRUE)

# init new markovchain from TP matrix
mc <- new("markovchain", states = dimnames(tp.loops)[[1]], transitionMatrix = tp.loops)

# investigate the most common surface horizon colors
sort(table(s[, 1]$color), decreasing = TRUE)
## 
## 7.5YR 3/2  10YR 3/3   5YR 3/3   5YR 3/4  10YR 2/2  10YR 3/2 7.5YR 3/3 
##         5         2         2         2         1         1         1
# simulate 30 sequences, starting with the most common A horizon moist color
munsell.sequence <- replicate(30, rmarkovchain(n = 150, object = mc, include.t0 = TRUE, t0 = "7.5YR 3/2"))

# convert to plot-able colors
col.sequence <- apply(munsell.sequence, 2, parseMunsell)

# visualize
par(mar = c(1, 0, 3, 0))
plot(1, 1, type = 'n', axes = FALSE, xlab = '', ylab = '', ylim = c(160, 1), xlim = c(1, 30))

# vectorized functions are the best
rect(xleft = col(col.sequence) - 0.5, ybottom = row(col.sequence) -0.5, xright = col(col.sequence) + 0.5, ytop = row(col.sequence) + 0.5, col = col.sequence, border = NA, lty = 0)

Add most likely sequence.

ml <- mostLikelyHzSequence(mc, t0 = "7.5YR 3/2")

par(mar=c(1,0,3,0))
plot(1, 1, type = 'n', axes = FALSE, xlab = '', ylab = '', ylim = c(160, 1), xlim = c(1, 32))

# vectorized functions are the best
rect(xleft = col(col.sequence) - 0.5, ybottom = row(col.sequence) -0.5, xright = col(col.sequence) + 0.5, ytop = row(col.sequence) + 0.5, col = col.sequence, border = NA, lty = 0)

# stretch most likely sequence... this isn't quite right
rect(xleft = 31.5, ybottom = 10 * seq_along(ml) - 0.5, xright = 32.5, ytop = 20 * seq_along(ml) + 0.5, col = parseMunsell(ml))

Combine horizons with the same color.

z <- data.frame(
  id = 'S',
  top = 0, 
  bottom = 151,
  color = NA
)

depths(z) <- id ~ top + bottom
z <- duplicate(z, times = 30)

plotSPC(z)

S-01 S-02 S-03 S-04 S-05 S-06 S-07 S-08 S-09 S-10 S-11 S-12 S-13 S-14 S-15 S-16 S-17 S-18 S-19 S-20 S-21 S-22 S-23 S-24 S-25 S-26 S-27 S-28 S-29 S-30 150 cm 125 cm 100 cm 75 cm 50 cm 25 cm 0 cm

z <- dice(z, 0:150 ~ .)

z$color <- do.call('c', lapply(1:ncol(col.sequence), function(i) {col.sequence[, i]}))

zz <- collapseHz(z, by = 'color')

par(mar = c(0, 0, 0, 2.5))
plotSPC(zz, color = 'color', width = 0.35)

S-01 S-02 S-03 S-04 S-05 S-06 S-07 S-08 S-09 S-10 S-11 S-12 S-13 S-14 S-15 S-16 S-17 S-18 S-19 S-20 S-21 S-22 S-23 S-24 S-25 S-26 S-27 S-28 S-29 S-30 150 cm 125 cm 100 cm 75 cm 50 cm 25 cm 0 cm

Do it again, this time for a soil series with a lot of data: Clarksville.

# get lab / morphologic data
x <- fetchKSSL(series = 'clarksville', returnMorphologicData = TRUE, simplifyColors = TRUE)

# extract SoilProfileCollection
s <- x$SPC

# remove horizons that are missing moist colors
s <- subsetHz(s, !is.na(m_hue) & !is.na(m_value) & !is.na(m_chroma))

# remove profiles with missing horizons due to above steps
s <- HzDepthLogicSubset(s)

# keep only profiles with > 2 horizons
idx <- which(profileApply(s, nrow) > 2)
s <- s[idx, ]

# re-assemble Munsell color notation for moist color
s$color <- sprintf("%s %s/%s", s$m_hue, s$m_value, s$m_chroma)

Clarksville moist soil colors.

par(mar=c(0,0,2,0))
plotSPC(s, color = 'moist_soil_color', print.id = FALSE, name = '', depth.axis = FALSE, width = 0.4, divide.hz = FALSE, lty = 0)
mtext('KSSL data correllated to Clarksville series', at = 0.5, adj = 0)

KSSL data correllated to Clarksville series

previewColors(s$moist_soil_color)
mtext('KSSL data correllated to Clarksville series: moist colors.', at = 0.5, adj = 0)

KSSL data correllated to Clarksville series: moist colors.

# init TP matrix
tp.loops <- hzTransitionProbabilities(s, name = "color", loopTerminalStates = TRUE)

# init new markovchain from TP matrix
mc <- new("markovchain", states=dimnames(tp.loops)[[1]], transitionMatrix = tp.loops)

# simulate 30 sequences, starting with the most common A horizon moist color
munsell.sequence <- replicate(30, rmarkovchain(n = 10, object = mc, include.t0 = TRUE, t0 = "10YR 4/2"))

# convert to plot-able colors
col.sequence <- apply(munsell.sequence, 2, parseMunsell)

Simulated sequence of moist soil colors, starting from 10YR 4/2.

par(mar = c(1, 0, 3, 0))
plot(1, 1, type = 'n', axes = FALSE, xlab = '', ylab = '', ylim = c(11, 1), xlim = c(1, 30))

rect(xleft = col(col.sequence) - 0.5, ybottom = row(col.sequence) -0.5, xright = col(col.sequence) + 0.5, ytop = row(col.sequence) + 0.5, col = col.sequence)


This document is based on aqp version 2.1.0 and soilDB version 2.8.5.