Purpose: fit hierarchical stream temp model to all EcoDrought data. Estimate random betas independently by site and month, then inspect spatiotemporal variation in effects of air temperature, flow, and their interaction of stream temperature.
This model uses air temperature and flow data standardized by site.
Notes:
Consider bringing in additional data from NWIS to fill in middle-upper end of catchment size spectrum (Snake: Blackrock, Cache, Granite…bigger rivers? Hoback, Gros Ventre, Greys…where to draw the line?)
consider putting a prior on flow variables in the JAGS model to deal with missing values (rather than setting NAs to 0, as in line 57)
7.1 Load data
Code
dat <-read_csv("data/EcoDrought_FlowTempData_formatted.csv") %>%mutate(month =month(date)) #%>%#filter(basin == "West Brook")# # redo basin and site codes# dat <- dat %>% arrange(basin, site_name, year, yday)# sitecodes <- tibble(site_name = unique(dat$site_name), site_code = 1:length(unique(dat$site_name)))# monthcodes <- tibble(month = unique(dat$month), month_code = 1:length(unique(dat$month)))# basincodes <- tibble(basin = unique(dat$basin), basin_code = 1:length(unique(dat$basin)))# # dat <- dat %>%# select(-c(site_code, basin_code)) %>%# left_join(sitecodes) %>%# left_join(monthcodes) %>%# left_join(basincodes) %>%# mutate(rowNum = 1:nrow(.))dat
Most Site-Year-Months are relatively complete (>29 days of data). But there are many that are incomplete, which likely affects convergence/accurate parameter estimation. Remove these periods and re-do basin, site, and month codes.
Code
dat %>%mutate(site_year_month =paste(site_name, year, month, sep ="_")) %>%group_by(site_year_month) %>%summarize(nobs =n()) %>%ggplot() +geom_histogram(aes(x = nobs)) +theme_bw() +xlab("Number of days per Site-Year-Month")
Code
# identify site-month-years to keep: those with >= 25 days of datakeeps <- dat %>%mutate(site_year_month =paste(site_name, year, month, sep ="_")) %>%group_by(site_year_month) %>%summarize(nobs =n()) %>%ungroup() %>%filter(nobs >=20)# filter data to keeps (~complete months)dat2 <- dat %>%mutate(site_year_month =paste(site_name, year, month, sep ="_")) %>%filter(site_year_month %in% keeps$site_year_month) %>%mutate(rowNum =1:nrow(.))# are the number of unique sites, basins, and months the same between unfiltered and filtered datasets?length(unique(dat$site_name))
[1] 65
Code
length(unique(dat2$site_name))
[1] 65
Code
length(unique(dat$basin))
[1] 7
Code
length(unique(dat2$basin))
[1] 7
Code
length(unique(dat$month))
[1] 12
Code
length(unique(dat2$month))
[1] 12
Code
# if YES, rename to datdat <- dat2# if NO, need to re-do site, basin, and month codes# ...# ...# ...sitecodes <- dat %>%group_by(site_code) %>%summarize(site_name =unique(site_name), basin =unique(basin),basin_code =unique(basin_code)) %>%ungroup()
Specify “hierarchical” model following Letcher et al. (2016). MODIFIED:
add prior on covariates to deal with missing values
calculate Bayesian R^2 and RMSE
remove the year trends (same as for Hocking et al 2018)
estimate covariate effects independently for each site and month (no pooling). This may need to be changed down the line…to account for inherent grouping of data within basins, sites, and months.
Code
cat("model {###----------------- LIKELIHOOD -----------------### # Days without an observation on the previous day (first observation in a series) # No autoregressive term for (i in 1:nFirstObsRows){ temp[firstObsRows[i]] ~ dnorm(stream.mu[firstObsRows[i]], pow(sigma, -2)) stream.mu[firstObsRows[i]] <- trend[firstObsRows[i]] trend[firstObsRows[i]] <- B1[site[firstObsRows[i]], month[firstObsRows[i]]] * X.site[firstObsRows[i],1] + B2[site[firstObsRows[i]], month[firstObsRows[i]]] * X.site[firstObsRows[i],2] + B3[site[firstObsRows[i]], month[firstObsRows[i]]] * X.site[firstObsRows[i],3] + B4[site[firstObsRows[i]], month[firstObsRows[i]]] * X.site[firstObsRows[i],4] + B5[site[firstObsRows[i]], month[firstObsRows[i]]] * X.site[firstObsRows[i],5] + B6[site[firstObsRows[i]], month[firstObsRows[i]]] * X.site[firstObsRows[i],6] #inprod(B.site[site[firstObsRows[i]], month[firstObsRows[i]], ], X.site[firstObsRows[i], ]) } # Days with an observation on the previous dat (all days following the first day) # Includes autoregressive term (ar1) for (i in 1:nEvalRows){ temp[evalRows[i]] ~ dnorm(stream.mu[evalRows[i]], pow(sigma, -2)) stream.mu[evalRows[i]] <- trend[evalRows[i]] + ar1[site[evalRows[i]]] * (temp[evalRows[i]-1] - trend[ evalRows[i]-1 ]) trend[evalRows[i]] <- B1[site[evalRows[i]], month[evalRows[i]]] * X.site[evalRows[i],1] + B2[site[evalRows[i]], month[evalRows[i]]] * X.site[evalRows[i],2] + B3[site[evalRows[i]], month[evalRows[i]]] * X.site[evalRows[i],3] + B4[site[evalRows[i]], month[evalRows[i]]] * X.site[evalRows[i],4] + B5[site[evalRows[i]], month[evalRows[i]]] * X.site[evalRows[i],5] + B6[site[evalRows[i]], month[evalRows[i]]] * X.site[evalRows[i],6] #inprod(B.site[site[evalRows[i]], month[evalRows[i]], ], X.site[evalRows[i], ]) } # Prior on covariates to handle missing (flow) data for (i in 1:n){ for (k in 1:Krandom) { X.site[i,k] ~ dnorm(0, pow(1, -2)) } } # Save log-likelihoods for LOO-CV for (i in 1:n) { loglik[i] <- logdensity.norm(temp[i], stream.mu[i], pow(sigma, -2)) } ###----------------- PRIORS ---------------------### # ar1, hierarchical by site for (i in 1:nsites){ ar1[i] ~ dnorm(ar1Mean, pow(ar1SD, -2)) T(-1,1) } ar1Mean ~ dunif(-1, 1) ar1SD ~ dunif(0, 2) # model variance sigma ~ dunif(0, 100) # random effect coefficients (by site) # for (k in 1:Krandom) { # sigma.B.site[k] ~ dunif(0, 100) # for (i in 1:nsites) { # for (m in unqmonths) { # B.site[i,m,k] ~ dnorm(0, pow(sigma.B.site[k], -2)) # } # } # } sigma.B1 ~ dunif(0, 100) sigma.B2 ~ dunif(0, 100) sigma.B3 ~ dunif(0, 100) sigma.B4 ~ dunif(0, 100) sigma.B5 ~ dunif(0, 100) sigma.B6 ~ dunif(0, 100) for (i in 1:nsites) { for (m in 1:nmonths) { B1[i,m] ~ dnorm(0, pow(sigma.B1, -2)) B2[i,m] ~ dnorm(0, pow(sigma.B2, -2)) B3[i,m] ~ dnorm(0, pow(sigma.B3, -2)) B4[i,m] ~ dnorm(0, pow(sigma.B4, -2)) B5[i,m] ~ dnorm(0, pow(sigma.B5, -2)) B6[i,m] ~ dnorm(0, pow(sigma.B6, -2)) } } # # random effect coefficients (by site with covariate effects, hierarchical by basin) # for (k in 1:Krandom) { # # # site level regressions # for (i in 1:nsites) { # B.site[i,k] ~ dnorm(mu.B.site[i,k], pow(sigma.B.site[k], -2)) # mu.B.site[i,k] <- alpha.0[basin[i],k] + alpha.1[basin[i],k] * elev[i] # } # # # basin level variation in elevation effects # for (j in 1:nbasins) { # alpha.0[j,k] ~ dnorm(mu.alpha.0[k], pow(sigma.alpha.0[k], -2)) # alpha.1[j,k] ~ dnorm(mu.alpha.1[k], pow(sigma.alpha.1[k], -2)) # } # # # hyperpriors: global means and standard deviations # mu.alpha.0[k] ~ dnorm(0, pow(10, -2)) # mu.alpha.1[k] ~ dnorm(0, pow(10, -2)) # # sigma.B.site[k] ~ dunif(0, 100) # # sigma.alpha.0[k] ~ dunif(0, 100) # sigma.alpha.1[k] ~ dunif(0, 100) # } ###----------------- DERIVED VALUES -------------### for (i in 1:n) { residuals[i] <- temp[i] - stream.mu[i] } # variance of model predictions (fixed + random effects) var_fit <- (sd(stream.mu))^2 # residual variance var_res <- (sd(residuals))^2 # calculate Bayesian R^2 R2 <- var_fit / (var_fit + var_res) # Root mean squared error rmse <- sqrt(mean(residuals[]^2)) }", file ="JAGS models/DailyTempModelJAGS_Letcher_hierarchical_modified_site-month.txt")
7.4 Organize objects
Get first observation indices and check that nFirstRowObs equals the number of unique site-years: must be TRUE!
Code
# row indices for first observation in each site-yearfirstObsRows <-unlist(dat %>%group_by(siteYear) %>%summarize(index = rowNum[min(which(!is.na(tempc_mean)))]) %>%ungroup() %>%select(index))nFirstObsRows <-length(firstObsRows)# does the number of first observations match the number of site years?nFirstObsRows ==length(unique(dat$siteYear))
rm(fit_ed)write_csv(rownames_to_column(as.data.frame(param.summary), var ="par"), "Model objects/LetcherTempModel_EcoDrought_hierarchical_SiteMonth_paramsummary.csv")
7.5.0.2 Check convergence
Any problematic R-hat values (>1.05)?
Clearly some convergence issues for a few parameters. Probably driven by sites/months with poor data availability, or omission of year effects/offsets. Good enough for now…but should diagnose these at some point. Pooling may help with this.
ggplot() +geom_histogram(aes(x =c(ppdat_obs_mean - ppdat_exp_mean)), color ="black", fill ="grey") +xlab("Residuals") +theme_bw() +theme(panel.grid =element_blank()) +geom_vline(xintercept =0, color ="red") +xlim(-4,4)
Residuals by time (day of year): are we missing something important? Like some time dependent variable (e.g., water source to flow ~~ the transition from snowmelt to groundwater).
Code
tibble(site_name = dat$site_name, year = dat$year, yday = dat$yday, resid =c(ppdat_obs_mean - ppdat_exp_mean)) %>%ggplot(aes(x = yday, y = resid)) +geom_point(alpha =0.1) +geom_hline(yintercept =c(0), color ="red") +geom_smooth(se =FALSE, linewidth =1.5) +theme_bw() +#theme(panel.grid = element_blank()) +xlab("Day of year") +ylab("Residual") +#facet_wrap(~site_name) + coord_cartesian(ylim =c(-3,3)) #+ facet_wrap(~year)
It’s not clear how informative monthly intercepts are as the air temp and flow data are standardized by site, not by site AND month. Regardless, some interesting patterns.
There are some really interesting temporal patterns in the effect of temperature, with temperature sensitivity generally at it’s maximum during high flow months (snowmelt/runoff), and reduced sensitivity during low flow months (baseflow). This appears to be conserved across basins.
There are some really interesting temporal patterns here, with the effect of flow varying considerably over time within sites. In some basins/sites, the sign of the flow effect changes seasonally, often negative in the spring and positive in the summer. In Staunton and West Brook, flow effects are also always positive: in Staunton, the strongest flow effects are during the spring and autumn, whereas in West Brook, the strongest effects are during summer.
There really aren’t any clear patterns here, and I wonder how useful this interaction is given that we’re allowing the effects of air temp and flow to vary by month.
I’m starting to wonder how reasonable it is to include these lagged effect of air temperature in this model. Lagged variables do have strong effects, and Ben included them in his 2016 and 2018 papers…but those model were built for prediction
Again, some potentially interesting patterns, but given the strong correlation with air temp and that we are trying to make inference from the model…probably best just to drop
result %>%filter(basin =="Donner Blitzen") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Donner Blitzen", x ="Month", y ="Intercept") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Flathead") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Flathead", x ="Month", y ="Intercept") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Paine Run") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Paine Run", x ="Month", y ="Intercept") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Snake River") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Snake River", x ="Month", y ="Intercept") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Staunton River") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Staunton River", x ="Month", y ="Intercept") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="West Brook") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="West Brook", x ="Month", y ="Intercept") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Yellowstone") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Yellowstone", x ="Month", y ="Intercept") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
7.7.2 Temp Effects
Code
# extract relevant parameter namesallpars2 <-row.names(param.summary)temppars <- allpars2[grepl("B2", allpars2)]temppars <- temppars[-length(temppars)]# get numeric components (site code and month)m <-str_match(temppars, "\\[(\\d+),(\\d+)\\]")# create tibble with means and 95% credible intervalsresult <-tibble(site_code =as.integer(m[,2]),month =as.integer(m[,3]),est_mean = param.summary[temppars,1],est_low = param.summary[temppars,3],est_high = param.summary[temppars,7] ) %>%mutate(cont0 =contains_zero(est_low, est_high)) %>%left_join(sitecodes) %>%left_join(sitemonths) %>%filter(!is.na(site_month_code)) %>%mutate(site_name =factor(site_name, levels =unique(dat$site_name)))
result %>%filter(basin =="Donner Blitzen") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Donner Blitzen", x ="Month", y ="Flow Effect") +geom_hline(yintercept =0, linetype ="dashed") +theme_bw()
Code
result %>%filter(basin =="Flathead") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Flathead", x ="Month", y ="Flow Effect") +geom_hline(yintercept =0, linetype ="dashed") +theme_bw()
Code
result %>%filter(basin =="Paine Run") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Paine Run", x ="Month", y ="Flow Effect") +geom_hline(yintercept =0, linetype ="dashed") +theme_bw()
Code
result %>%filter(basin =="Snake River") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Snake River", x ="Month", y ="Flow Effect") +geom_hline(yintercept =0, linetype ="dashed") +theme_bw()
Code
result %>%filter(basin =="Staunton River") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Staunton River", x ="Month", y ="Flow Effect") +geom_hline(yintercept =0, linetype ="dashed") +theme_bw()
Code
result %>%filter(basin =="West Brook") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="West Brook", x ="Month", y ="Flow Effect") +geom_hline(yintercept =0, linetype ="dashed") +theme_bw()
Code
result %>%filter(basin =="Yellowstone") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Yellowstone", x ="Month", y ="Flow Effect") +geom_hline(yintercept =0, linetype ="dashed") +theme_bw()
7.7.4 Lag 1 Temp Effects
Code
# extract relevant parameter namesallpars2 <-row.names(param.summary)temppars <- allpars2[grepl("B5", allpars2)]temppars <- temppars[-length(temppars)]# get numeric components (site code and month)m <-str_match(temppars, "\\[(\\d+),(\\d+)\\]")# create tibble with means and 95% credible intervalsresult <-tibble(site_code =as.integer(m[,2]),month =as.integer(m[,3]),est_mean = param.summary[temppars,1],est_low = param.summary[temppars,3],est_high = param.summary[temppars,7] ) %>%mutate(cont0 =contains_zero(est_low, est_high)) %>%left_join(sitecodes) %>%left_join(sitemonths) %>%filter(!is.na(site_month_code)) %>%mutate(site_name =factor(site_name, levels =unique(dat$site_name)))
result %>%filter(basin =="Donner Blitzen") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Donner Blitzen", x ="Month", y ="Lag 1 Temperature Effect") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Flathead") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Flathead", x ="Month", y ="Lag 1 Temperature Effect") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Paine Run") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Paine Run", x ="Month", y ="Lag 1 Temperature Effect") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Snake River") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Snake River", x ="Month", y ="Lag 1 Temperature Effect") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Staunton River") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Staunton River", x ="Month", y ="Lag 1 Temperature Effect") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="West Brook") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="West Brook", x ="Month", y ="Lag 1 Temperature Effect") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()
Code
result %>%filter(basin =="Yellowstone") %>%ggplot() +geom_smooth(aes(x = month, y = est_mean), color ="black") +geom_point(aes(x = month, y = est_mean, color = site_name, shape = cont0)) +geom_line(aes(x = month, y = est_mean, color = site_name)) +labs(title ="Yellowstone", x ="Month", y ="Lag 1 Temperature Effect") +#geom_vline(xintercept = 0, linetype = "dashed") +theme_bw()