Introduction

This document demonstrates how to use the soilDB package to download KSSL data from SoilWeb. These data are from the September 2016 snapshot, and will be updated as future snapshots are released. Comparisons are made via graphical summaries of key soil properties with depth, using data structures and functions from the aqp package.

Installation

With a recent version of R (>= 2.15), it is possible to get all of the packages that this tutorial depends on via:

# stable packages from CRAN
install.packages('maps', dep=TRUE)
install.packages('plyr', dep=TRUE)
install.packages('reshape2', dep=TRUE)
install.packages('soilDB', dep=TRUE)

# latest versions from GitHub
devtools::install_github("ncss-tech/aqp", dependencies=FALSE, upgrade_dependencies=FALSE)
devtools::install_github("ncss-tech/soilDB", dependencies=FALSE, upgrade_dependencies=FALSE)

Quick Example: getting lab characterization (KSSL) and basic morphology (NASIS)

KSSL data are from the September 2016 snapshot, NASIS data are from the April 2016 snapshot. Details pending.

library(soilDB)
library(plyr)
library(reshape2)


# get lab and morphologic data
s <- fetchKSSL(series='auburn', returnMorphologicData = TRUE)

# the result is a list, check it out
str(s, 2)
## List of 2
##  $ SPC  :Formal class 'SoilProfileCollection' [package "aqp"] with 7 slots
##  $ morph:List of 4
##   ..$ phcolor    :'data.frame':  147 obs. of  7 variables:
##   ..$ phfrags    :'data.frame':  43 obs. of  10 variables:
##   ..$ phpores    :'data.frame':  107 obs. of  5 variables:
##   ..$ phstructure:'data.frame':  61 obs. of  7 variables:
# check out the "raw" morphologic data:
lapply(s$morph, head)
## $phcolor
##    phiid labsampnum colorpct colorhue colorvalue colorchroma colormoistst
## 1 820051   05N02182       NA     10YR          4           4          Dry
## 2 820051   05N02182       NA     10YR          3           3        Moist
## 3 820052   05N02183       NA    7.5YR          3           3        Moist
## 4 820052   05N02183       NA    7.5YR          4           6          Dry
## 5 820053   05N02184       NA      5YR          3           3        Moist
## 6 820053   05N02184       NA      5YR          4           4          Dry
## 
## $phfrags
##     phiid labsampnum fragvol                   fragkind fragsize_l fragsize_r fragsize_h fragshp
## 1  820051   05N02182      14 Metamorphic rock fragments          2          5         76      NA
## 2  820052   05N02183      10 Metamorphic rock fragments          2         20         76      NA
## 3  820053   05N02184      18 Metamorphic rock fragments          2          5         76      NA
## 4 1842112   10N04396       2       Greenstone fragments          2          3          5      NA
## 5 1842114   10N04397       4       Greenstone fragments          2         10         20      NA
## 6 1842114   10N04397       2       Greenstone fragments          2         10         20      NA
##    fraground          fraghard
## 1 Subrounded         Indurated
## 2 Subrounded         Indurated
## 3 Subangular         Indurated
## 4       <NA>   Weakly cemented
## 5       <NA> Strongly cemented
## 6       <NA>   Weakly cemented
## 
## $phpores
##     phiid labsampnum poreqty  poresize           poreshp
## 1  820051   05N02182     5.0      Fine      Interstitial
## 2  820051   05N02182     7.0    Medium           Tubular
## 3  820052   05N02183     5.0 Very fine Dendritic tubular
## 4  820052   05N02183     6.0    Medium           Tubular
## 5  820053   05N02184     5.0 Very fine Dendritic tubular
## 6 1842112   10N04396     0.8    Medium           Tubular
## 
## $phstructure
##     phiid labsampnum structgrade structsize        structtype structid structpartsto
## 1  820051   05N02182    Moderate       Fine          Granular       NA            NA
## 2  820052   05N02183    Moderate     Medium Subangular blocky       NA            NA
## 3  820053   05N02184    Moderate     Medium Subangular blocky       NA            NA
## 4 1842112   10N04396    Moderate       Fine          Granular       NA            NA
## 5 1842114   10N04397    Moderate     Medium Subangular blocky       NA            NA
## 6 1842116   10N04398    Moderate     Medium Subangular blocky       NA            NA
# extract pedons into SoilProfileCollection
pedons <- s$SPC

# simplify color data
# note that both horizon ID and color "percentage" must be specified
s.colors <- simplifyColorData(s$morph$phcolor, id.var = 'labsampnum', wt='colorpct')

# merge color data into SPC
h <- horizons(pedons)
h <- join(h, s.colors, by='labsampnum', type='left', match='first')
horizons(pedons) <- h

# check
par(mar=c(0,0,0,0))
plot(pedons, color='moist_soil_color', print.id=FALSE, name='hzn_desgn')

# simplify fragment data
s.frags <- simplfyFragmentData(s$morph$phfrags, id.var='labsampnum')

# merge fragment data into SPC
h <- horizons(pedons)
h <- join(h, s.frags, by='labsampnum', type='left', match='first')
horizons(pedons) <- h


# check
par(mar=c(0,0,3,0))
plot(pedons, color='total_frags_pct', print.id=FALSE, name='hzn_desgn')

Fetch Data Associated with a Set of IDs

Fetching KSSL data for a set of IDs requires some additional "helper" functions. The following example fetches data according to a vector of "pedlabsampnum" IDs, extracts the horizon color data, filters missing data, and combines the results into a single table.

First, paste the following code into your script or console to initialize our helper functions.

# get data associated with a single ID
# no records -> NULL returned
getPedons <- function(x) {
  suppressMessages(fetchKSSL(pedlabsampnum=x, returnMorphologicData = TRUE))
}

# extract a morphologic table from each record set
# NULL data are filtered out
# converted to data.frame
extractMorphTable <- function(x, table='phcolor') {
  m <- lapply(x, function(i) i[['morph']][[table]])
  # index pedons with data
  idx <- which(sapply(m, length) > 0) 
  if(length(idx) < 1)
    return(NULL)
  # filter non-NULL and convert to DF
  m <- ldply(m[idx])
  return(m)
}

Next, use the getPedons() helper function to fetch lab and morphologic data according to a vector of pedlabsampnum IDs. Note that the results are a list of lists. Select morphologic data are extracted and combined with the extractMorphTable() function.

# pedons indexed by "pedlabsampnum" ID
pls <- c("04N0610", "04N0611", "04N0612", "04N0613")
# iterate over IDs and get data
res <- lapply(pls, getPedons)

# check: OK
str(res, 2)
## List of 4
##  $ :List of 2
##   ..$ SPC  :Formal class 'SoilProfileCollection' [package "aqp"] with 7 slots
##   ..$ morph:List of 4
##  $ :List of 2
##   ..$ SPC  :Formal class 'SoilProfileCollection' [package "aqp"] with 7 slots
##   ..$ morph:List of 4
##  $ :List of 2
##   ..$ SPC  :Formal class 'SoilProfileCollection' [package "aqp"] with 7 slots
##   ..$ morph:List of 4
##  $ :List of 2
##   ..$ SPC  :Formal class 'SoilProfileCollection' [package "aqp"] with 7 slots
##   ..$ morph:List of 4
# extract phcolor data from list of records
# result is a data.frame with all non-NULL rows
phcolor <- extractMorphTable(res, table='phcolor')

# check: OK
head(phcolor)
##     phiid labsampnum colorpct colorhue colorvalue colorchroma colormoistst
## 1 1463128   04N03382      100     10YR          5           4        Moist
## 2 1463138   04N03383      100      5YR          4           6        Moist
## 3 1463139   04N03384      100      5YR          4           6        Moist
## 4 1463140   04N03385       33    2.5YR          4           8        Moist
## 5 1463140   04N03385       33    7.5YR          5           8        Moist
## 6 1463140   04N03385       34      10R          4           8        Moist

Finally, extract the SoilProfileCollection objects from res and combine into a single object. We can now join our combined color data with the combined pedon data.

# extract pedons from list
pedons <- lapply(res, function(i) i$SPC)
# combine into a single SPC object
pedons <- do.call('rbind', pedons)

# check: looks good
plot(pedons, color='estimated_ph_h2o', name='hzn_desgn')

# simplify combined color data
colors <- simplifyColorData(phcolor, id.var = 'labsampnum', wt='colorpct')

# join combined color data to SPC
h <- horizons(pedons)
h <- join(h, colors, by='labsampnum', type='left', match='first')
horizons(pedons) <- h

# check: looks good
plot(pedons, color='moist_soil_color', name='hzn_desgn')

Example Application

Data can be queried by "taxonname"" (typically a soil series), using a geographic bounding-box in WGS84 referenced coordinates, or by MLRA. Taxonname matching is case-insensitive and based on the most current taxonname in NASIS. See ?fetchKSSL() or this GitHub repository for details.

In this example, we are downloading KSSL data for the musick, chaix, and holland soils; commonly found on granitic rocks of the Sierra Nevada Mountain region of California.

# load libraries
library(soilDB)
library(plyr)
library(lattice)
library(maps)

# define plotting style
tps <- list(superpose.line=list(col=c('RoyalBlue', 'DarkRed', 'DarkGreen'), lwd=2))

# fetch KSSL data by fuzzy-matching of series name
musick <- fetchKSSL('musick')
holland <- fetchKSSL('holland')
chaix <- fetchKSSL('chaix')

# generate a basemap of northern California, with county boundaries
map('county', 'California', xlim=c(-123.25, -118.75), ylim=c(36.5, 41))
# add long/lat axes
map.axes()
# add locations of musick
points(y ~ x, data=site(musick), pch=21, bg='RoyalBlue')
# add locations of holland
points(y ~ x, data=site(holland), pch=21, bg='DarkRed')
# add locations of chaix
points(y ~ x, data=site(chaix), pch=21, bg='DarkGreen')
# add a simple legend
legend('topright', pch=21, pt.bg=c('RoyalBlue', 'DarkRed', 'DarkGreen'), legend=c('musick', 'holland', 'chaix'), bty='n')

Profile Plots

Generate "thematic" profile sketches by coloring horizons according to clay content.

par(mar=c(0,1,5,1))
plot(musick, name='hzn_desgn', color='clay')

plot(holland, name='hzn_desgn', color='clay')

plot(chaix, name='hzn_desgn', color='clay')

Combining Data

Since the results from fetchKSSL() function always returns data in the same format, it is possible to stack the results of several KSSL queries using rbind().

# check "correlated as" names from each object
table(musick$taxonname)
## 
## Musick MUSICK 
##      7      4
table(holland$taxonname)
## 
## Holland HOLLAND 
##      18       6
table(chaix$taxonname)
## 
## Chaix CHAIX 
##     7     4
# since there are multiple permuations of each, normalize soil series name by replacement
musick$taxonname <- 'musick'
holland$taxonname <- 'holland'
chaix$taxonname <- 'chaix'

# stack datasets into a new SoilProfileCollection
g <- rbind(chaix, musick, holland)

# compute weighted-mean particle diameter
g$wmpd <- with(horizons(g), ((vcs * 1.5) + (cs * 0.75) + (ms * 0.375) + (fs * 0.175) + (vfs *0.075) + (silt * 0.026) + (clay * 0.0015)) / (vcs + cs + ms + fs + vfs + silt + clay))

# tabulate number of pedons by normalized series name
table(g$taxonname)
## 
##   chaix holland  musick 
##      11      24      11
# plot the grouped object, with profiles arranged by taxonname
par(mar=c(0,1,5,1))
groupedProfilePlot(g, groups = 'taxonname', color='clay', group.name.offset = -10)

# estimate soil depth based on horizon designations
sdc <- getSoilDepthClass(g, name='hzn_desgn', top='hzn_top', bottom='hzn_bot')

# splice-into site data
site(g) <- sdc

# summarize soil depth by taxonname
tapply(g$depth, g$taxonname, summary)
## $chaix
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    64.0    81.0    97.0   105.5   108.0   185.0 
## 
## $holland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   132.0   158.0   170.0   192.1   199.5   371.0 
## 
## $musick
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   152.0   177.5   183.0   209.1   250.0   295.0

Aggregation

The slab() function is used to aggregate selected variables within collections of soil profiles along depth-slices. In this case, we aggregate clay, pH (1:1 H2O, estimated from saturated paste pH when missing), and base saturation at pH 8.2 along 1-cm thick slices and within groups defined by the variable 'taxonname'. See ?slab() for details on how this function can be used.

g.slab <- slab(g, taxonname ~ clay + estimated_ph_h2o + bs82 + wmpd)

# inspect stacked data structure
str(g.slab)
## 'data.frame':    4452 obs. of  10 variables:
##  $ variable             : Factor w/ 4 levels "clay","estimated_ph_h2o",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ taxonname            : chr  "chaix" "chaix" "chaix" "chaix" ...
##  $ p.q5                 : num  3.63 3.63 3.63 3.71 3.71 ...
##  $ p.q25                : num  5.82 5.82 5.82 6.34 6.34 ...
##  $ p.q50                : num  7.95 7.95 7.95 8.14 8.14 ...
##  $ p.q75                : num  9.25 9.25 9.25 9.64 9.64 ...
##  $ p.q95                : num  10.2 10.2 10.2 11.7 11.7 ...
##  $ top                  : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ bottom               : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ contributing_fraction: num  0.818 0.818 0.818 1 1 ...

Refine Labels

It is convenient to know how many pedons there are within each collection--therefore, we append this value to the series name. Using the same approach, we can rename the soil properties with more useful descriptions and units of measure.

# re-name soils with series name + number of pedons-- order is critical !
new.levels <- c('musick', 'holland', 'chaix')
new.labels <- paste(new.levels, ' [', c(length(musick), length(holland), length(chaix)), ' pedons]', sep='')
g.slab$taxonname <- factor(g.slab$taxonname, levels=new.levels, labels=new.labels)

# new names should match the order in:
levels(g.slab$variable)
## [1] "clay"             "estimated_ph_h2o" "bs82"             "wmpd"
# re-name soil property labels-- order is critical !
levels(g.slab$variable) <- c('Clay (%)', 'pH 1:1 H2O', 'Base Saturation at pH 8.2 (%)', 'WMPD (mm)')

Graphically Compare

The slice-wise median and 25th/75th percentiles are reasonable estimations of central tendency and spread. "Contributing fraction" values (% of pedons with data at a given depth) are printed along the right-hand side of each panel. These values provide both an indication of how deep the soils are, and, how much confidence can be placed in the aggregate data at any given depth.

# plot grouped, aggregate data
xyplot(top ~ p.q50 | variable, groups=taxonname, data=g.slab, ylab='Depth',
             xlab='median bounded by 25th and 75th percentiles',
             lower=g.slab$p.q25, upper=g.slab$p.q75, ylim=c(155,-5),
             panel=panel.depth_function, alpha=0.25, sync.colors=TRUE,
             prepanel=prepanel.depth_function,
             cf=g.slab$contributing_fraction,
       par.strip.text=list(cex=0.8),
             strip=strip.custom(bg=grey(0.85)),
             layout=c(4,1), scales=list(x=list(alternating=1, relation='free'), y=list(alternating=3)),
             par.settings=tps,
             auto.key=list(columns=3, lines=TRUE, points=FALSE)
)