A soil color / transition probability (TP) matrix experiment. Transition probabilities are computed in horizon depth order, starting from the top of each profile. The TP matrix can be used to investigate likely sequences and end-points, or, to simulate sequences using Markov chains.

Use pedon data associated with the Holland soil series.

library(aqp)
library(soilDB)
library(sharpshootR)
library(igraph)
library(reshape2)
library(markovchain)
library(RColorBrewer)
library(scales)

# get lab / morphologic data
x <- fetchKSSL(series = 'holland', returnMorphologicData = TRUE, simplifyColors = TRUE)

# extract pedons into SoilProfileCollection
s <- x$SPC

# remove horizons that are missing moist colors
s <- subsetHz(s, !is.na(m_hue) & !is.na(m_value) & !is.na(m_chroma))

# remove profiles with missing horizons due to above steps
s <- HzDepthLogicSubset(s)

# re-assemble Munsell color notation for moist color
s$color <- sprintf("%s %s/%s", s$m_hue, s$m_value, s$m_chroma)

Moist color changes with depth.

par(mar=c(0,0,2,0))
plotSPC(s, color='moist_soil_color', print.id=FALSE, name='', plot.depth.axis=FALSE, width=0.4)
mtext('KSSL data correllated to Holland series', at=0.5, adj = 0)

Compute depth-wise transition probability matrix for moist colors. Visualize as a graph organized by communities.

# # remove profile #8
# idx <- (1:length(s))[-8]
# s <- s[idx, ]

# generate TP matrix from horizon moist colors
tp <- hzTransitionProbabilities(s, name="color", loopTerminalStates = FALSE)

# greate graph object
par(mar = c(1, 1, 1, 1))
g <- plotSoilRelationGraph(tp, graph.mode = "directed", vertex.scaling.factor=2, edge.arrow.size = 0.5, edge.scaling.factor = 2.5, vertex.label.cex = 0.75, vertex.label.family = "sans")

Sketch profiles using same colors as community colors in network graph.

# get clustering vector and colors names from graph
cl <- data.frame(color=V(g)$name, cluster=V(g)$cluster, stringsAsFactors = FALSE)

# join with SPC
# note that we have to extract / join / splice
h <- horizons(s)
h <- merge(x = h, y = cl, by = 'color', sort = FALSE)
replaceHorizons(s) <- h

# hack re-recreate colors used by plotSoilRelationGraph
# good reminder to return more from that function...
cols <- colorRampPalette(brewer.pal(n = 9, name = "Set1"))(max(cl$cluster))
s$cluster_color <- alpha(cols[s$cluster], 0.65)

# profile sketches, colors match communities in graph above
par(mar=c(0,0,1,1))
plotSPC(s, color='cluster_color', print.id=FALSE, name='hzn_desgn', name.style = 'center-center')
mtext('KSSL data correllated to Holland series', at=0.5, adj = 0)

Visualize as graph with vertices colored according to soil color.

# join Munsell color notation to graph nodes
d <- data.frame(color=V(g)$name, stringsAsFactors = FALSE)
d <- plyr::join(d, horizons(s), by = 'color', type='left', match='first')
V(g)$color <- d$moist_soil_color

# prepare labels by converting to HSV
hsv.cols <- t(rgb2hsv(col2rgb(V(g)$color)))
hsv.cols[, 1] <- 0
hsv.cols[, 2] <- 0
hsv.cols[, 3] <- ifelse(hsv.cols[, 3] > 0.5, 0, 1)
V(g)$label.color <- hsv(hsv.cols[, 1], hsv.cols[, 2], hsv.cols[, 3])

# remove loops from graph, retain duplicate paths
g <- simplify(g, remove.loops = TRUE, remove.multiple=FALSE)

# final plot
par(mar=c(0,0,1,0), bg=grey(0.85))
set.seed(1010101)
plot(g, edge.arrow.size = 0.5, vertex.label.cex = 0.55, vertex.label.family = "sans", vertex.label.font=2, edge.color='black')

Simulate moist color sequences using Markov chains derived from transition probability matrix. Weight the TP matrix (and MC?) by working from 1cm slices. Does this make sense? Probably not.

# re-make TP matrix, this time including terminal loops
s.slices <- slice(s, 0:150 ~ .)
tp.loops <- hzTransitionProbabilities(s.slices, name="color", loopTerminalStates = TRUE)

# init new markovchain from TP matrix
mc <- new("markovchain", states=dimnames(tp.loops)[[1]], transitionMatrix = tp.loops)

# investigate the most common surface horizon colors
sort(table(h$color[grep('^A', h$hzn_desgn)]), decreasing = TRUE)
## 
## 7.5YR 3/2   5YR 3/4  10YR 3/3  10YR 4/4   5YR 3/3  10YR 2/2  10YR 3/2  10YR 3/4 2.5YR 3/4   5YR 4/3 
##         8         6         2         2         2         1         1         1         1         1 
## 7.5YR 3/3 7.5YR 3/4 7.5YR 4/3 7.5YR 4/4 
##         1         1         1         1
# simulate 30 sequences, starting with the most common A horizon moist color
munsell.sequence <- replicate(30, rmarkovchain(n = 150, object = mc, include.t0 = TRUE, t0 = "7.5YR 3/2"))

# convert to plot-able colors
col.sequence <- apply(munsell.sequence, 2, parseMunsell)

# visualize
par(mar=c(1,0,3,0))
plot(1, 1, type='n', axes=FALSE, xlab='', ylab='', ylim=c(160, 1), xlim=c(1, 30))

# vectorized functions are the best
rect(xleft = col(col.sequence) - 0.5, ybottom = row(col.sequence) -0.5, xright = col(col.sequence) + 0.5, ytop = row(col.sequence) + 0.5, col = col.sequence, border = NA, lty = 0)

Add most likely sequence.

ml <- mostLikelyHzSequence(mc, t0 = "7.5YR 3/2")

par(mar=c(1,0,3,0))
plot(1, 1, type='n', axes=FALSE, xlab='', ylab='', ylim=c(160, 1), xlim=c(1, 32))

# vectorized functions are the best
rect(xleft = col(col.sequence) - 0.5, ybottom = row(col.sequence) -0.5, xright = col(col.sequence) + 0.5, ytop = row(col.sequence) + 0.5, col = col.sequence, border = NA, lty = 0)

# stretch most likely sequence... this isn't quite right
rect(xleft=31.5, ybottom = 10 * seq_along(ml) - 0.5, xright = 32.5, ytop = 20 * seq_along(ml) + 0.5, col = parseMunsell(ml))

Do it again, this time for a soil series with a lot of data: Clarksville.

# get lab / morphologic data
x <- fetchKSSL(series='clarksville', returnMorphologicData = TRUE, simplifyColors = TRUE)

# extract SoilProfileCollection
s <- x$SPC

# remove horizons that are missing moist colors
s <- subsetHz(s, !is.na(m_hue) & !is.na(m_value) & !is.na(m_chroma))

# remove profiles with missing horizons due to above steps
s <- HzDepthLogicSubset(s)

# keep only profiles with > 2 horizons
idx <- which(profileApply(s, nrow) > 2)
s <- s[idx, ]

# re-assemble Munsell color notation for moist color
s$color <- sprintf("%s %s/%s", s$m_hue, s$m_value, s$m_chroma)

Clarksville moist soil colors.

par(mar=c(0,0,2,0))
plotSPC(s, color='moist_soil_color', print.id=FALSE, name='', plot.depth.axis=FALSE, width=0.4, divide.hz=FALSE, lty = 0)
mtext('KSSL data correllated to Clarksville series', at=0.5, adj = 0)

previewColors(s$moist_soil_color)
mtext('KSSL data correllated to Clarksville series: moist colors.', at=0.5, adj = 0)

# init TP matrix
tp.loops <- hzTransitionProbabilities(s, name="color", loopTerminalStates = TRUE)

# init new markovchain from TP matrix
mc <- new("markovchain", states=dimnames(tp.loops)[[1]], transitionMatrix = tp.loops)

# simulate 30 sequences, starting with the most common A horizon moist color
munsell.sequence <- replicate(30, rmarkovchain(n = 10, object = mc, include.t0 = TRUE, t0 = "10YR 4/2"))

# convert to plot-able colors
col.sequence <- apply(munsell.sequence, 2, parseMunsell)

Simulated sequence of moist soil colors, starting from 10YR 4/2.

par(mar=c(1,0,3,0))
plot(1, 1, type='n', axes=FALSE, xlab='', ylab='', ylim=c(11, 1), xlim=c(1, 30))

rect(xleft = col(col.sequence) - 0.5, ybottom = row(col.sequence) -0.5, xright = col(col.sequence) + 0.5, ytop = row(col.sequence) + 0.5, col = col.sequence)


This document is based on aqp version 2.0 and soilDB version 2.7.7.