California Cooperative Snow Survey Summary

These are my notes on getting, processing, and plotting the CA cooperative snow survey data each winter.

Key points:

data are fetched from CDEC via sharpshootR::CDECsnowQuery() by course number
data are compared using a combination of SWE percentiles, SPI, and z-score transformation of SWE
downloading the full record for all courses take a little while
some fairly bizzare Lattice graphics witchcraft is employed to get the first figure
the data object CDEC.snow.courses from the sharpshootR package contains a snapshot of snow course metadata

There is plenty of room for improvement.

Custom Functions

# http://joewheatley.net/20th-century-droughts/
getSPI <- function(y){
  #empirical, symmetrical Standard Precipitation Index
  fit.cdf <- ecdf(y)
  fit.rcdf <- ecdf(-y)
  cdfs <- fit.cdf(y)
  rcdfs <- 1 -fit.rcdf(-y)
  #invert normal
  spi.t <- qnorm(cdfs)
  spi.tp <- na.omit(spi.t[ spi.t != Inf ]) #drop Inf
  #reversed
  rspi.t <- qnorm(rcdfs)
  rspi.tp <- na.omit(rspi.t[ rspi.t != -Inf ]) #drop Inf
  # enforce symmetry
  spi.sym <- (spi.t+rspi.t)/2
  spi.sym[which(spi.t == Inf)] <- rspi.t[which(spi.t==Inf)]
  spi.sym[which(rspi.t == -Inf)] <- spi.t[which(rspi.t==-Inf)]
  return(spi.sym)
}

# custom panel function, run once within each lattice panel
panel.SWE <- function(x, y, ...) {
  panel.grid(h=FALSE, v=-1, lty=2)
  # make box-whisker plot
  panel.bwplot(x, y, ...)
  # tabulate number of obs (years) of data per station
  course.levels <- levels(y)
  n.yrs <- tapply(x, y, function(i) length(na.omit(i)))
  # determine station position on y-axis
  y.pos <- match(names(n.yrs), course.levels)
  # add 'yrs'
  n.yrs.txt <- paste('(', n.yrs, ' yrs)', sep='')
  # annotate panel with number of years / box-whisker
  grid.text(label=n.yrs.txt, x=unit(0.99, 'npc'), y=unit(y.pos, 'native'), gp=gpar(cex=0.65, font=2), just='right')
}

# custom stats for box-whisker plot: 5th-25th-50th-75th-95th percentiles
custom.bwplot <- function(x, coef=NA, do.out=FALSE) {
  stats <- quantile(x, p=c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm=TRUE)
  n <- length(na.omit(x))
  out.low <- x[which(x < stats[1])]
  out.high <- x[which(x > stats[5])]
  return(list(stats=stats, n=n, conf=NA, out=c(out.low, out.high)))
}

# custom strip function, allowing more than 1 strip color
strip.color.fun <- function(..., bg) {
  strip.default(..., bg=trellis.par.get("strip.background")$col[which.packet()])
}

Data and Summary for the Stanislaus

library(sharpshootR)
library(latticeExtra)
library(tactile)
library(grid)
library(purrr)

# current year
yr <- 2025

# one of: 'February', 'March', 'April', 'May'
mo <- 'April'

# Stanislaus River watershed
course.data <- data.frame(
  course_number = c(129, 323, 131, 132, 134, 345, 344, 138, 139, 384, 140, 142, 143, 145, 152)
)

getHistoricData <- function(i) {
  .res <- CDECsnowQuery(course = i, start_yr = 1900, end_yr = yr)
  .res$course_number <- i
  return(.res)
}

# get historic data
d.long_term <- map_df(.x = course.data$course_number, .progress = TRUE, .f = getHistoricData)

# join-in metadata
data(CDEC.snow.courses)
d.long_term <- merge(d.long_term, CDEC.snow.courses, by = 'course_number', all.x = TRUE, sort = FALSE)

# reset levels of site names, sorting by elevation
names.elevations <- unique(d.long_term[, c('name', 'elev_feet')])
new.levels <- names.elevations$name[order(names.elevations$elev_feet)]
d.long_term$name <- factor(d.long_term$name, levels = new.levels)

# compute historical means
longTermMean <- function(i) {
  
  .res <- data.frame(
    course_number = i$course_number[1],
    month = i$month[1],
    avg.SWE = mean(i$SWE, na.rm = TRUE), 
    avg.density = mean(i$density, na.rm = TRUE),
    avg.Depth = mean(i$Depth, na.rm = TRUE),
    no.yrs = length(na.omit(i$SWE))
  )
  
  return(.res)
}

s <- split(d.long_term, list(d.long_term$course_number, d.long_term$month), drop = TRUE)
d.long_term.avg <- map_df(s, .progress = TRUE, .f = longTermMean)

# make a copy of the current yr / mo data
d.current <- subset(d.long_term, subset=month == mo & year == yr)

# join current data with long term averages
# keep only data that exists in both tables
d.merged <- merge(d.current, d.long_term.avg, by = c('course_number', 'month'), all.x = TRUE, sort = FALSE)

# compute pct of normal of depth, SWE, density
d.merged$pct_of_normal_Depth <- with(d.merged, (Depth / avg.Depth) * 100.0)
d.merged$pct_of_normal_SWE <- with(d.merged, (SWE / avg.SWE) * 100.0)
d.merged$pct_of_normal_density <- with(d.merged, (density / avg.density) * 100.0)


## using customized boxplot !!
## compare with Feb, March, April, May only

# determine which panel contains the current month's data
this.month.panel <- match(mo, levels(d.current$month))
# strip colors
strip.colors <- rep(grey(0.95), times=length(levels(d.current$month)))
# modifiy this month's strip color
strip.colors[this.month.panel] <- grey(0.75)

## compare this year's data with long-term variability
p1 <- xyplot(name ~ SWE | month, data=d.current, fill='RoyalBlue', pch=21, cex=0.75, ylab='')
p2 <- bwplot(name ~ SWE | month, data=d.long_term, panel=panel.SWE, stats=custom.bwplot, xlab='Snow Water Eqivalent (in)', layout=c(5, 1), strip=strip.color.fun, as.table=TRUE, scales=list(x=list(alternating=3)), subset=month %in% c('January','February','March','April','May'))


# combine figures
p3 <- p2 + p1

# add title, and legend
main.title <- paste(mo, yr, 'Snow Survey')
p3 <- update(p3, main=main.title, key=list(corner=c(0.97,-0.15), points=list(pch=21, fill='RoyalBlue', cex=1), text=list(paste('Current (', mo, ' ', yr, ') SWE', sep='')), between=1))


# make legend sub-plot
set.seed(101010)
x <- rnorm(100)
x.stats <- custom.bwplot(x)
p4 <- bwplot(x, stats=custom.bwplot, scales=list(draw=FALSE), xlab=list('Percentiles', cex=0.75), ylab='', ylim=c(0.5, 1.75))
p4 <- p4 + layer(panel.text(x=x.stats$stats, y=1.5, label=c('5%','25%','50%','75%','95%'), cex=0.75))

# setup plot style
tps <- list(box.dot=list(col='black', pch='|'), box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), plot.symbol=list(col='black', cex=0.33), strip.background=list(col=strip.colors))

trellis.par.set(tps)
print(p3, more=TRUE, position=c(0,0.08,1,1))
print(p4, more=FALSE, position=c(0.125,0,0.425,0.175))

Analysis of the Entire Record, all Courses

# all snow courses
data(CDEC.snow.courses)

# get historic data
d.long_term <- map_df(.x = CDEC.snow.courses$course_number, .progress = TRUE, .f = getHistoricData)

# save local cache
# save(d.long_term, file='long_term_data-cache.rda')

# join-in the snow metadata
d.long_term <- merge(
  d.long_term, 
  CDEC.snow.courses, 
  by = 'course_number',
  all.x = TRUE,
  sort = FALSE
)

# reset levels of site names, sorting by elevation
names.elevations <- unique(d.long_term[, c('name', 'elev_feet')])
new.levels <- names.elevations$name[order(names.elevations$elev_feet)]
d.long_term$name <- factor(d.long_term$name, levels = new.levels)

# requires >1 row of data
scaleFun <- function(i) {
  if(nrow(i) > 1) {
    scaled.SWE <- scale(i$SWE)
    emp.pctile <- ecdf(i$SWE)(i$SWE)
    spi <- getSPI(i$SWE)
    
    d <- data.frame(
      course_number = i$name, 
      Date = i$Date, 
      year = i$year, 
      month = i$month,
      elev_feet = i$elev_feet,
      scaled.SWE = scaled.SWE, 
      emp.pctile = emp.pctile, 
      SWE = i$SWE, 
      spi = spi
    )
    
    return(d)
  }
}


## scale by course / month, across all years
s <- split(d.long_term, list(d.long_term$name, d.long_term$month))
scaled.data <- map_df(s, .progress = TRUE, .f = scaleFun)

# re-order by course, Date
scaled.data <- scaled.data[order(scaled.data$course_number, scaled.data$Date), ]

# subset out the typical months
scaled.data <- subset(scaled.data, month %in% c('February', 'March', 'April', 'May') & year > 1930)
scaled.data$month <- factor(scaled.data$month)

# not all that interesting
elevEffect <- function(i) {
  
  if(nrow(i) > 1) {
    
    .res <- data.frame(
      year = i$year[1],
      month = i$month[1]
    )
    
    if(length(which(complete.cases(i))) > 12) {
      m <- lm(emp.pctile ~ elev_feet, data = i)
      .res$elev.intercept <- coef(m)[1]
      .res$elev.slope <- coef(m)[2]
    } else {
      .res$elev.intercept <- NA
      .res$elev.slope <- NA
    }
  }
  
  return(.res)
  
}

s <- split(scaled.data, list(scaled.data$year, scaled.data$month))
elev.effects <- map_df(s, .progress = TRUE, .f = elevEffect)

bwplot(
  elev.intercept ~ month,
  data = elev.effects,
  par.settings = tactile.theme()
)

bwplot(
  elev.slope ~ month,
  data = elev.effects,
  par.settings = tactile.theme()
)


# # SWE ~ elevation relationship over all stations
# cr <- colorRampPalette(hcl.colors(n = 100, palette = 'spectral', rev = TRUE))
# 
# hexbinplot(scaled.SWE ~ elev_feet | month, data = scaled.data, colorkey = FALSE, scales = list(alternating = 1), colramp = cr, par.settings = tactile.theme())

# plot all data, each line is a course
xyplot(SWE ~ year | month, data=scaled.data, groups=course_number, col=rgb(0.1, 0.1, 0.1, alpha=0.25), type=c('l','g'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)))

# plot all data: each dot is a survey
xyplot(SWE ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2)

xyplot(scaled.SWE ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2)

xyplot(emp.pctile ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2)

xyplot(emp.pctile ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.25, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2, subset=year > 1970)

tps <- tactile.theme(box.dot=list(col='black', pch='|'), box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), plot.symbol=list(col='black', cex=0.33), strip.background=list(col=grey(0.85)))

bwplot(emp.pctile ~ factor(year) | month, data=scaled.data, as.table=TRUE, par.settings=tps, scales=list(alternating=3, x=list(tick.number=10, rot=90)), subset=year > 1970 & month == mo, stats=custom.bwplot, panel=function(...) {panel.grid(-1, -1); panel.bwplot(...)})

bwplot(spi ~ factor(year) | month, data=scaled.data, as.table=TRUE, par.settings=tps, scales=list(alternating=3, x=list(tick.number=10, rot=90)), subset=year > 1970 & month == mo, stats=custom.bwplot, panel=function(...) {panel.grid(-1, -1); panel.bwplot(...)})

por <- sprintf('Period of Record: 1939-%s', yr)

bwplot(emp.pctile ~ factor(year), data=scaled.data, main=sprintf('%s Snow Survey', mo), sub = por, ylim=c(-0.05, 1.1), ylab='Empirical Percentiles of SWE', par.settings=tps, scales=list(alternating=3, y=list(tick.number=10)), subset=year > 1997 & month == mo, panel=function(x=x, y=y, ...) {
  panel.grid(-1, -1)
  panel.abline(h=0.5, lty=2, col='orange')
  panel.bwplot(x=x, y=y, ...)
  n.stations <- tapply(y, x, length)
  panel.text(x=1:length(n.stations), y=1.03, labels=n.stations, cex=0.65, font=2)
  panel.text(x=10, y=1.075, labels='Snow Courses Reporting', cex=0.85, font=3)
})

.names <- CDEC.snow.courses$name[CDEC.snow.courses$course_number %in% course.data$course_number]

levelplot(
  emp.pctile ~ year * course_number | month, 
  data = scaled.data, 
  subset = year > 1960 & month == mo, 
  ylab = '', xlab = '', 
  par.settings = tactile.theme(), 
  scales = list(x = list(tick.number = 20), y = list(cex = 0.5))
)

levelplot(
  emp.pctile ~ year * course_number | month, 
  data = scaled.data, 
  subset = year > 1960 & month == mo & course_number %in% .names, 
  ylab = '', xlab = '', 
  par.settings = tactile.theme(regions = list(col = hcl.colors(100, pal = 'spectral')) ), 
  scales = list(x = list(tick.number = 20), y = list(cex = 0.85)),
  main = 'Percentiles of SWE by Course/Month'
)

This document is based on sharpshootR version 2.3.3.

California Cooperative Snow Survey Summary

D.E. and J.M. Beaudette

2025-03-28

Custom Functions

Data and Summary for the Stanislaus

Analysis of the Entire Record, all Courses