These are my notes on getting, processing, and plotting the CA cooperative snow survey data each winter.
Key points:
sharpshootR::CDECsnowQuery()
by course numberCDEC.snow.courses
from the
sharpshootR
package contains a snapshot of snow course
metadataThere is plenty of room for improvement.
# http://joewheatley.net/20th-century-droughts/
getSPI <- function(y){
#empirical, symmetrized Standard Precipitation Index
fit.cdf <- ecdf(y)
fit.rcdf <- ecdf(-y)
cdfs <- fit.cdf(y)
rcdfs <- 1 -fit.rcdf(-y)
#invert normal
spi.t <- qnorm(cdfs)
spi.tp <- na.omit(spi.t[ spi.t != Inf ]) #drop Inf
#reversed
rspi.t <- qnorm(rcdfs)
rspi.tp <- na.omit(rspi.t[ rspi.t != -Inf ]) #drop Inf
#symmetrise
spi.sym <- (spi.t+rspi.t)/2
spi.sym[which(spi.t == Inf)] <- rspi.t[which(spi.t==Inf)]
spi.sym[which(rspi.t == -Inf)] <- spi.t[which(rspi.t==-Inf)]
return(spi.sym)
}
# custom panel function, run once within each lattice panel
panel.SWE <- function(x, y, ...) {
panel.grid(h=FALSE, v=-1, lty=2)
# make box-whisker plot
panel.bwplot(x, y, ...)
# tabulate number of obs (years) of data per station
course.levels <- levels(y)
n.yrs <- tapply(x, y, function(i) length(na.omit(i)))
# determine station position on y-axis
y.pos <- match(names(n.yrs), course.levels)
# add 'yrs'
n.yrs.txt <- paste('(', n.yrs, ' yrs)', sep='')
# annotate panel with number of years / box-whisker
grid.text(label=n.yrs.txt, x=unit(0.99, 'npc'), y=unit(y.pos, 'native'), gp=gpar(cex=0.65, font=2), just='right')
}
# custom stats for box-whisker plot: 5th-25th-50th-75th-95th percentiles
custom.bwplot <- function(x, coef=NA, do.out=FALSE) {
stats <- quantile(x, p=c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm=TRUE)
n <- length(na.omit(x))
out.low <- x[which(x < stats[1])]
out.high <- x[which(x > stats[5])]
return(list(stats=stats, n=n, conf=NA, out=c(out.low, out.high)))
}
# custom strip function, allowing more than 1 strip color
strip.color.fun <- function(..., bg) {
strip.default(..., bg=trellis.par.get("strip.background")$col[which.packet()])
}
library(sharpshootR)
library(latticeExtra)
library(tactile)
library(grid)
library(purrr)
# current data are:
yr <- 2024
mo <- 'May'
# Stanislaus River watershed
course.data <- data.frame(course_number=c(129, 323, 131, 132, 134, 345, 344, 138, 139, 384, 140, 142, 143, 145, 152))
getHistoricData <- function(i) {
.res <- CDECsnowQuery(course = i, start_yr = 1900, end_yr = yr)
.res$course_number <- i
return(.res)
}
# get historic data
d.long_term <- map(.x = course.data$course_number, .progress = TRUE, .f = getHistoricData)
d.long_term <- do.call('rbind', d.long_term)
# join-in metadata
data(CDEC.snow.courses)
d.long_term <- merge(d.long_term, CDEC.snow.courses, by = 'course_number', all.x = TRUE, sort = FALSE)
# reset levels of site names, sorting by elevation
names.elevations <- unique(d.long_term[, c('name', 'elev_feet')])
new.levels <- names.elevations$name[order(names.elevations$elev_feet)]
d.long_term$name <- factor(d.long_term$name, levels = new.levels)
# c('February', 'March', 'April', 'May')
## TODO: remove all plyr code
## compute long-term average, by course / month, using the Adjusted
d.long_term.avg <- plyr::ddply(d.long_term, c('course_number', 'month'), plyr::summarise,
avg.SWE = mean(SWE, na.rm=TRUE),
avg.density = mean(density, na.rm=TRUE),
avg.Depth = mean(Depth, na.rm=TRUE),
no.yrs = length(na.omit(SWE))
)
# make a copy of the current yr / mo data
d.current <- subset(d.long_term, subset=month == mo & year == yr)
# join current data with long term averages
# keep only data that exists in both tables
d.merged <- plyr::join(d.current, d.long_term.avg, by=c('course_number', 'month'), type='left')
# compute pct of normal of depth, SWE, density
d.merged$pct_of_normal_Depth <- with(d.merged, (Depth / avg.Depth) * 100.0)
d.merged$pct_of_normal_SWE <- with(d.merged, (SWE / avg.SWE) * 100.0)
d.merged$pct_of_normal_density <- with(d.merged, (density / avg.density) * 100.0)
## using customized boxplot !!
## compare with Feb, March, April, May only
# determine which panel contains the current month's data
this.month.panel <- match(mo, levels(d.current$month))
# strip colors
strip.colors <- rep(grey(0.95), times=length(levels(d.current$month)))
# modifiy this month's strip color
strip.colors[this.month.panel] <- grey(0.75)
## compare this year's data with long-term variability
p1 <- xyplot(name ~ SWE | month, data=d.current, fill='RoyalBlue', pch=21, cex=0.75, ylab='')
p2 <- bwplot(name ~ SWE | month, data=d.long_term, panel=panel.SWE, stats=custom.bwplot, xlab='Snow Water Eqivalent (in)', layout=c(5, 1), strip=strip.color.fun, as.table=TRUE, scales=list(x=list(alternating=3)), subset=month %in% c('January','February','March','April','May'))
# combine figures
p3 <- p2 + p1
# add title, and legend
main.title <- paste(mo, yr, 'Snow Survey')
p3 <- update(p3, main=main.title, key=list(corner=c(0.97,-0.15), points=list(pch=21, fill='RoyalBlue', cex=1), text=list(paste('Current (', mo, ' ', yr, ') SWE', sep='')), between=1))
# make legend sub-plot
set.seed(101010)
x <- rnorm(100)
x.stats <- custom.bwplot(x)
p4 <- bwplot(x, stats=custom.bwplot, scales=list(draw=FALSE), xlab=list('Percentiles', cex=0.75), ylab='', ylim=c(0.5, 1.75))
p4 <- p4 + layer(panel.text(x=x.stats$stats, y=1.5, label=c('5%','25%','50%','75%','95%'), cex=0.75))
# setup plot style
tps <- list(box.dot=list(col='black', pch='|'), box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), plot.symbol=list(col='black', cex=0.33), strip.background=list(col=strip.colors))
trellis.par.set(tps)
print(p3, more=TRUE, position=c(0,0.08,1,1))
print(p4, more=FALSE, position=c(0.125,0,0.425,0.175))
# all snow courses
data(CDEC.snow.courses)
# get historic data
d.long_term <- plyr::ddply(CDEC.snow.courses, 'course_number', .progress='text', .fun=function(i) {CDECsnowQuery(course=i$course_number, start_yr = 1900, end_yr = yr)})
# save local cache
# save(d.long_term, file='long_term_data-cache.rda')
# join-in the snow metadata
d.long_term <- plyr::join(d.long_term, CDEC.snow.courses, by='course_number', type='left')
# reset levels of site names, sorting by elevation
names.elevations <- unique(d.long_term[, c('name', 'elev_feet')])
new.levels <- names.elevations$name[order(names.elevations$elev_feet)]
d.long_term$name <- factor(d.long_term$name, levels=new.levels)
## scale by course / month, across all years
scaled.data <- plyr::ddply(d.long_term, c('name', 'month'), function(i) {
scaled.SWE <- scale(i$SWE)
emp.pctile <- ecdf(i$SWE)(i$SWE)
spi <- getSPI(i$SWE)
d <- data.frame(course_number=i$name, Date=i$Date, year=i$year, month=i$month, scaled.SWE=scaled.SWE, emp.pctile=emp.pctile, SWE=i$SWE, spi=spi)
return(d)
})
# re-order by course, Date
scaled.data <- scaled.data[order(scaled.data$name, scaled.data$Date), ]
# subset out the typical months
scaled.data <- subset(scaled.data, month %in% c('February', 'March', 'April', 'May') & year > 1930)
scaled.data$month <- factor(scaled.data$month)
# plot all data, each line is a course
xyplot(SWE ~ year | month, data=scaled.data, groups=name, col=rgb(0.1, 0.1, 0.1, alpha=0.25), type=c('l','g'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)))
# plot all data: each dot is a survey
xyplot(SWE ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2)
xyplot(scaled.SWE ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2)
xyplot(emp.pctile ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.125, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2)
xyplot(emp.pctile ~ year | month, data=scaled.data, type=c('p','g', 'smooth'), span=0.25, as.table=TRUE, scales=list(alternating=3, x=list(tick.number=10, rot=90)), cex=0.25, pch=16, lwd=2, subset=year > 1970)
tps <- tactile.theme(box.dot=list(col='black', pch='|'), box.rectangle=list(col='black'), box.umbrella=list(col='black', lty=1), plot.symbol=list(col='black', cex=0.33), strip.background=list(col=grey(0.85)))
bwplot(emp.pctile ~ factor(year) | month, data=scaled.data, as.table=TRUE, par.settings=tps, scales=list(alternating=3, x=list(tick.number=10, rot=90)), subset=year > 1970 & month == mo, stats=custom.bwplot, panel=function(...) {panel.grid(-1, -1); panel.bwplot(...)})
bwplot(spi ~ factor(year) | month, data=scaled.data, as.table=TRUE, par.settings=tps, scales=list(alternating=3, x=list(tick.number=10, rot=90)), subset=year > 1970 & month == mo, stats=custom.bwplot, panel=function(...) {panel.grid(-1, -1); panel.bwplot(...)})
por <- sprintf('Period of Record: 1939-%s', yr)
bwplot(emp.pctile ~ factor(year), data=scaled.data, main=sprintf('%s Snow Survey', mo), sub = por, ylim=c(-0.05, 1.1), ylab='Empirical Percentiles of SWE', par.settings=tps, scales=list(alternating=3, y=list(tick.number=10)), subset=year > 1997 & month == mo, panel=function(x=x, y=y, ...) {
panel.grid(-1, -1)
panel.abline(h=0.5, lty=2, col='orange')
panel.bwplot(x=x, y=y, ...)
n.stations <- tapply(y, x, length)
panel.text(x=1:length(n.stations), y=1.03, labels=n.stations, cex=0.65, font=2)
panel.text(x=10, y=1.075, labels='Snow Courses Reporting', cex=0.85, font=3)
})
This document is based on sharpshootR
version 2.3.