4 PASTA by Groundwater

4.1 Data

4.1.1 Sites

Get site-level data from Kevin’s temperature model

Code

# name changes for WyADAPT
dat_names <- read_csv("Stream temperature/StreamSiteNames_WYADAPT.csv")

# site-level covariates and temp model rmse
dat_sites <- read_csv("Stream temperature/model_obs_pred_data.csv") %>% 
  mutate(model_resid = model_pred - meanT,
         year = as.factor(year(date))) %>%
  group_by(site) %>%
  summarize(gw_influence = mean(gw_influence),
            h2oAreaKm2_log = mean(h2oAreaKm2_log),
            forest_buff_perc = mean(forest_buff_perc),
            h2o_slope_mean = mean(h2o_slope_mean),
            h2o_elev_mean = mean(h2o_elev_mean),
            sampsize = n(),
            rmse = sqrt(mean((model_resid)^2)),
            mae = mean(abs(model_resid))) %>%
  ungroup() %>% 
  rename("CurrentName" = "site") %>% 
  left_join(dat_names)

dat_sites

# A tibble: 78 × 11
   CurrentName       gw_influence h2oAreaKm2_log forest_buff_perc h2o_slope_mean
   <chr>                    <dbl>          <dbl>            <dbl>          <dbl>
 1 bailey_1776              0.069          5.15              43.6          19.7 
 2 blackrock_1              0.145          3.14              33.7          12.0 
 3 blackrock_1A             0.08           4.87              42.9          12.5 
 4 blackrock_3              0.118          4.55              79.2          12.8 
 5 blackrock_blspli…        0.19           4.40              42.5          12.6 
 6 blacktail_1A             0.621          1.12              39.3           2.09
 7 blacktail_4B             0.65           0.704             43.7           2.17
 8 bluecrane_Ford           0.624          1.94              58.9           3.57
 9 box_NA                   0.042          3.39              74.7          12.4 
10 buffalofork_2110         0.097          6.42              45.6          17.5 
# ℹ 68 more rows
# ℹ 6 more variables: h2o_elev_mean <dbl>, sampsize <int>, rmse <dbl>,
#   mae <dbl>, UpdatedName <chr>, AdditionalDetails <chr>

Get location data

Code

sites_sp <- st_read("Stream Temperature/WySEaSON_SnakeGreys_StreamTemperature_locations.shp") %>%
  filter(name %in% unique(dat_sites$UpdatedName))

Reading layer `WySEaSON_SnakeGreys_StreamTemperature_locations' from data source `C:\Users\jbaldock\OneDrive - DOI\Documents\USGS\PASTA-groundwater\Stream temperature\WySEaSON_SnakeGreys_StreamTemperature_locations.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 118 features and 4 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -110.8945 ymin: 42.5621 xmax: -109.9837 ymax: 43.9417
Geodetic CRS:  WGS 84

Code

mapview(sites_sp)

View histogram of site-specific groundwater influence. Note: these are only available for the sites Kevin included in the daily stream temperature model. This excludes sites in the Grey’s river drainage, loggers deployed in groundwater spring seeps, and redundant sites (streams within 2+ loggers in close proximity).

Code

dat_sites %>% ggplot() + geom_histogram(aes(gw_influence))

Histogram of site-specific catchment size

Code

dat_sites %>% ggplot() + geom_histogram(aes(h2oAreaKm2_log))

4.1.2 PASTA

Load diel PASTA metrics from Tim Lambert

Code

dat_pasta <- read_csv("WY-GW_daily-metrics.csv") %>% 
  rename("UpdatedName" = "site") %>% 
  left_join(dat_sites) %>%
  mutate(year = year(date),
         month = month(date),
         doy = yday(date))

dat_pasta

# A tibble: 81,037 × 21
   UpdatedName  date       amplitude_ratio phase_lag mean_offset mean_ratio
   <chr>        <date>               <dbl>     <dbl>       <dbl>      <dbl>
 1 Bailey Creek 2019-05-12           0.510    -10.9       -3.52       0.665
 2 Bailey Creek 2019-05-13           0.743    -10.9       -3.66       0.678
 3 Bailey Creek 2019-05-14           0.491     -9.27      -1.66       0.829
 4 Bailey Creek 2019-05-15           0.353     -9.36      -0.700      0.921
 5 Bailey Creek 2019-05-16           0.279     -8.86      -0.448      0.949
 6 Bailey Creek 2019-05-17           0.703     -8.53       3.86       1.92 
 7 Bailey Creek 2019-05-18           0.445     -9.86       3.38       1.83 
 8 Bailey Creek 2019-05-19           0.290     -8.61       2.55       1.53 
 9 Bailey Creek 2019-05-20           0.689     -8.73       3.41       1.84 
10 Bailey Creek 2019-05-21           0.304     -8.20       2.27       1.47 
# ℹ 81,027 more rows
# ℹ 15 more variables: R2_water <dbl>, R2_air <dbl>, CurrentName <chr>,
#   gw_influence <dbl>, h2oAreaKm2_log <dbl>, forest_buff_perc <dbl>,
#   h2o_slope_mean <dbl>, h2o_elev_mean <dbl>, sampsize <int>, rmse <dbl>,
#   mae <dbl>, AdditionalDetails <chr>, year <dbl>, month <dbl>, doy <dbl>

Code

# 
# view(dat_pasta %>%
#   filter(is.na(gw_influence)) %>% group_by(UpdatedName) %>% summarize(n = n()))
# 
# view(dat_pasta %>%
#   filter(!is.na(gw_influence)) %>% group_by(UpdatedName) %>% summarize(n = n()))

View histogram of R-squared for water and air temperature sine wave fits

Code

egg::ggarrange(dat_pasta %>% ggplot() + geom_histogram(aes(R2_water)),
               dat_pasta %>% ggplot() + geom_histogram(aes(R2_air)))

Load flagged phase_lag from Tim L.

Code

phaselag_flags <- read_csv("WY-GW_flag_phase-lag.csv") %>% 
  rename("UpdatedName" = "site")
phaselag_flags

# A tibble: 1,862 × 10
   UpdatedName   year month avg_phase_air avg_phase_water avg_phase_lag
   <chr>        <dbl> <dbl>         <dbl>           <dbl>         <dbl>
 1 Bailey Creek  2019     5          14.5          -0.381         -9.08
 2 Bailey Creek  2019     6          14.6          -0.260         -9.14
 3 Bailey Creek  2019     7          14.1          -0.420         -9.47
 4 Bailey Creek  2019     8          14.0          -0.691         -9.27
 5 Bailey Creek  2020     3          14.2          -1.32          -8.44
 6 Bailey Creek  2020     4          13.6          -0.636         -8.45
 7 Bailey Creek  2020     5          14.6          -0.419         -9.02
 8 Bailey Creek  2020     6          14.3          -0.895         -8.85
 9 Bailey Creek  2020     7          14.3          -0.885         -8.82
10 Bailey Creek  2020     8          14.1          -1.13          -8.82
# ℹ 1,852 more rows
# ℹ 4 more variables: phase_lag_flag1 <lgl>, phase_lag_flag2 <lgl>,
#   phase_lag_flag <dbl>, month_date <date>

Plot time series of monthly flags

Code

phaselag_flags %>%
  ggplot(aes(x = month_date, y = UpdatedName)) + 
  geom_point(aes(color = as.factor(phase_lag_flag))) + 
  scale_color_manual(values = c("0" = "grey", "1" = "orange", "2" = "red")) +
  theme_bw() + theme(legend.position = "top")

Get tibble of “good” sites/months…still >1000 site-months with “good” phase lag estimates. Yay!

Code

noflag <- phaselag_flags %>% filter(phase_lag_flag == 0) %>% select(UpdatedName, year, month, phase_lag_flag)
noflag

# A tibble: 1,074 × 4
   UpdatedName        year month phase_lag_flag
   <chr>             <dbl> <dbl>          <dbl>
 1 Blackrock Creek 1  2021     8              0
 2 Blackrock Creek 1  2021     9              0
 3 Blackrock Creek 1  2021    10              0
 4 Blackrock Creek 1  2022     4              0
 5 Blackrock Creek 1  2022     5              0
 6 Blackrock Creek 1  2022     7              0
 7 Blackrock Creek 1  2022     8              0
 8 Blackrock Creek 1  2022     9              0
 9 Blackrock Creek 1  2022    10              0
10 Blackrock Creek 1  2023     4              0
# ℹ 1,064 more rows

Trim raw PASTA output by phase_lag flags

Code

dat_pasta_trimmed <- dat_pasta %>% left_join(noflag) %>% filter(phase_lag_flag == 0)

4.1.2.1 Inspect PASTA

Set focal stream: Leidy Creek

Code

myname <- "Leidy Creek"

Inspect PASTA time series

Code

dat_pasta_trimmed %>% 
  filter(UpdatedName == myname) %>%
  ggplot(aes(x = doy, y = amplitude_ratio)) + 
  geom_point() + 
  facet_wrap(~year) + ylim(0,1) + 
  geom_smooth(se = FALSE) +
  theme_bw()

Code

dat_pasta_trimmed %>% 
  filter(UpdatedName == myname) %>%
  ggplot(aes(x = doy, y = phase_lag)) + 
  geom_point(color = "red") + 
  facet_wrap(~year) + 
  geom_smooth(se = FALSE, color = "black") +
  theme_bw()

What is the relationship between amplitude ratio and phase lag?

Code

dat_pasta_trimmed %>% 
  filter(UpdatedName == myname) %>%
  ggplot(aes(x = amplitude_ratio, y = phase_lag)) + 
  geom_point() + 
  facet_wrap(~year) +
  xlim(0,1) + ylim(-5,10) + 
  geom_smooth(se = FALSE) +
  theme_bw()

Does this hold for summer months (June - September)? No: negative association between Ar and Pl is driven by ~extreme values during winter and early spring.

Code

dat_pasta_trimmed %>% 
  filter(UpdatedName == myname, month %in% c(6:9)) %>%
  ggplot(aes(x = amplitude_ratio, y = phase_lag)) + 
  geom_point() + 
  facet_wrap(~year) +
  xlim(0,1) + ylim(-5,10) + 
  geom_smooth(se = FALSE) +
  theme_bw()

4.1.3 Streamflow

Get streamflow data from Pacific Creek, a ~central reference gage with long-term streamflow records. We don’t use any of the Snake River gages (e.g., Snake above Palisades Reservoir) because these are all affected by flow regulation at Jackson Lake Dam, and therefore do not provide a good measure of within and among-year variation in climate…particularly during summer months when flows from JLD are a function of downstream irrigation demands. (a metric of within and among year variation in water availability/wetness)

Code

# get mean daily flow in cfs from NWIS

site.info <- readNWISsite("13011500")[,c(1:2,7,8,30:31)] # get site info (Pacific Creek)
flow <- readNWISdata(sites = "13011500", parameterCd = c("00060"), service = "dv", startDate = "1960-01-01", endDate = "2025-09-01")
flow <- flow %>% select(3,4)
names(flow) <- c("date", "flow_cfs")

# fill missing dates using FASSTR
flow <- fill_missing_dates(data = flow, dates = date)

# calculate yield
flow <- flow %>% mutate(flow_cms = flow_cfs*0.02831683199881)
flow <- add_daily_yield(flow, values = flow_cms, basin_area = site.info$drain_area_va*2.58999)

# summarize by year and month
flow_monthly <- flow %>%
  mutate(month = month(date), year = year(date)) %>%
  group_by(year, month) %>%
  summarize(Yield_mm = sum(Yield_mm)) %>%
  ungroup()

View full time series of flow

Code

dygraph(flow %>% select(date, Yield_mm)) %>% dyRangeSelector()

View time series of monthly yield

Code

flow_monthly %>% 
  filter(year %in% c(2019:2024)) %>%
  ggplot(aes(x = month, y = Yield_mm)) + 
  geom_point() + geom_line() + 
  facet_wrap(~year) +
  theme_bw() + theme(panel.grid = element_blank())

4.1.4 Climate

Pull site-specific climate data from Daymet

Code

climlist <- vector("list", length = dim(sites_sp)[1])
for (i in 1:dim(sites_sp)[1]) {
  clim <- download_daymet(site = sites_sp$name[i], lat = sites_sp$latitude[i], lon = sites_sp$longitude[i], start = min(year(dat_pasta_trimmed$date))-1, end = max(year(dat_pasta_trimmed$date)), internal = T)
  climlist[[i]] <- tibble(clim$data) %>% 
    mutate(air_temp_mean = (tmax..deg.c. + tmin..deg.c.)/2, 
           date = as.Date(paste(year, yday, sep = "-"), "%Y-%j"),
           UpdatedName = sites_sp$name[i]) %>%
    select(12,2,11,10,4,6,3,5) %>% rename(precip_mmday = 5, swe_kgm2 = 6, daylength_sec = 7, shortrad_wm2 = 8)
  print(i)
}
climdf <- do.call(rbind, climlist)
climdf <- add_date_variables(climdf, dates = date, water_year_start = 10) %>% rename(month = Month)
write_csv(climdf, "Air temperature/Daymet_daily.csv")

Code

dat_climate <- read_csv("Air temperature/Daymet_daily.csv") %>%
  group_by(UpdatedName, WaterYear) %>% 
  summarize(swe_mean = mean(swe_kgm2),
            swe_sum = sum(swe_kgm2),
            swe_apr1 = swe_kgm2[yday == 91]) %>%
  ungroup() %>%
  left_join(read_csv("Air temperature/Daymet_daily.csv") %>%
              filter(yday >= 152 & yday <= 273) %>%
              group_by(UpdatedName, WaterYear, month) %>% 
              summarize(precip_mean = mean(precip_mmday),
                        precip_sum = sum(precip_mmday)) %>%
              ungroup()
    ) %>% 
  select(UpdatedName, WaterYear, month, swe_mean, swe_sum, swe_apr1, precip_mean, precip_sum) %>%
  rename(year = WaterYear)
ggpairs(dat_climate %>% select(4:8) )

4.2 Model PASTA

4.2.1 Summarize data

Summarize PASTA by month. Note that we filter to June-September and >= 2020, i.e., months/years with substantial data. Also, again, only considering the sites in Kevin’s temp model (removes high density temperature observations on some streams, loggers in springs, etc.)

Code

dat_pasta_monthly <- dat_pasta %>% 
  filter(R2_water >= 0.8, R2_air >= 0.8, !is.na(gw_influence)) %>%
  filter(month %in% c(6:9), year >= 2020) %>% # filter to just the years/months with substantial data
  group_by(UpdatedName, year, month) %>%
  summarize(ndays = n(),
            amplitude_ratio = mean(amplitude_ratio),
            phase_lag = mean(phase_lag),
            mean_offset = mean(mean_offset),
            mean_ratio = mean(mean_ratio),
            gw_influence = mean(gw_influence),
            h2oAreaKm2_log = mean(h2oAreaKm2_log)) %>%
  ungroup() %>%
  filter(ndays >= 25) %>% 
  left_join(flow_monthly) %>%
  mutate(gw_influence_log = log(gw_influence),
         Yield_mm_log = log(Yield_mm)) %>%
  left_join(dat_climate) %>%
  mutate(swe_apr1_log = log(swe_apr1+0.01),
         precip_sum_log = log(precip_sum+0.01)) %>%
  filter(!UpdatedName %in% c("Lake Creek")) # drop sites that we don't expect to conform to typical paired air-water temp dynamics

View distributions of summer mean PASTA parameters (some sites with multiple years). Note the problematic phase_lags (negative and >5)

Code

egg::ggarrange(dat_pasta_monthly %>% ggplot() + geom_histogram(aes(amplitude_ratio)),
               dat_pasta_monthly %>% ggplot() + geom_histogram(aes(phase_lag)),
               dat_pasta_monthly %>% ggplot() + geom_histogram(aes(mean_offset)),
               dat_pasta_monthly %>% ggplot() + geom_histogram(aes(mean_ratio)),
               nrow = 2, ncol = 2)

View pairs plot between groundwater influence, total monthly yield, watershed area, and month

Code

ggpairs(dat_pasta_monthly %>% select(gw_influence_log, Yield_mm_log, h2oAreaKm2_log, swe_apr1_log, precip_sum_log, month))

Weak correlation between watershed area and groundwater influence: groundwater-dominated streams tend to be smaller.
Weak correlation between groundwater influence and April 1 SWE: sites with strong groundwater influence tend to be at lower elevations, which receive less snowfall.
Strong correlation between month and total Yield: volumetric streamflow declines from June to September. Month ~is a proxy for water availability.

4.2.2 Amplitude ratio

4.2.2.1 Model by month/year

Facet by year and month

Code

dat_pasta_monthly %>% 
  ggplot(aes(x = gw_influence_log, y = amplitude_ratio)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(rows = vars(year), cols = vars(month)) + 
  theme_bw() + theme(panel.grid = element_blank())

Fit random slope and intercept model, by month and year

Code

mymod <- lmerTest::lmer(amplitude_ratio ~ gw_influence_log + (gw_influence_log | month + year + UpdatedName), data = dat_pasta_monthly)
summary(mymod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: amplitude_ratio ~ gw_influence_log + (gw_influence_log | month +  
    year + UpdatedName)
   Data: dat_pasta_monthly

REML criterion at convergence: -1196.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.2946 -0.5851 -0.0375  0.5922  3.3545 

Random effects:
 Groups      Name             Variance  Std.Dev. Corr 
 UpdatedName (Intercept)      2.638e-02 0.162407      
             gw_influence_log 3.174e-03 0.056341 0.96 
 year        (Intercept)      1.719e-03 0.041461      
             gw_influence_log 6.072e-05 0.007792 -0.42
 month       (Intercept)      1.446e-03 0.038023      
             gw_influence_log 1.178e-04 0.010852 0.59 
 Residual                     2.933e-03 0.054155      
Number of obs: 467, groups:  UpdatedName, 61; year, 5; month, 4

Fixed effects:
                 Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)       0.28977    0.04123 21.00838   7.027 6.15e-07 ***
gw_influence_log -0.04848    0.01470 31.93680  -3.297   0.0024 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
gw_nflnc_lg 0.715

View coefficients

Code

coef(mymod)$year

     (Intercept) gw_influence_log
2020   0.3173845      -0.04880867
2021   0.3079358      -0.05748761
2022   0.3027950      -0.04561930
2023   0.2189023      -0.04063523
2024   0.3018081      -0.04984237

Code

coef(mymod)$month

  (Intercept) gw_influence_log
6   0.3269529      -0.03571110
7   0.3074592      -0.05264541
8   0.2797297      -0.05627746
9   0.2449189      -0.04928058

Relationship between groundwater and amplitude ratio is generally negative, with the strongest negative effects typically seen in August and during the driest year (2021)

4.2.2.2 Model by covariates

Standardize covariates

Code

dat_pasta_monthly <- dat_pasta_monthly %>%
  mutate(z_gw_influence_log = c(scale(gw_influence_log)),
         z_yield_mm_log = c(scale(Yield_mm_log)),
         z_h2oareakm2_log = c(scale(h2oAreaKm2_log)),
         z_swe_apr1_log = c(scale(swe_apr1_log)),
         z_precip_sum = c(scale(precip_sum)))

Visualize relationships between amplitude ratio and (log) groundwater influence, (log) total monthly yield, and (log) watershed area.

Code

egg::ggarrange(
  dat_pasta_monthly %>% 
    ggplot(aes(x = z_gw_influence_log, y = amplitude_ratio)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()), 
  dat_pasta_monthly %>% 
    ggplot(aes(x = z_yield_mm_log, y = amplitude_ratio)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()),
  dat_pasta_monthly %>% 
    ggplot(aes(x = z_h2oareakm2_log, y = amplitude_ratio)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()),
  dat_pasta_monthly %>% 
    ggplot(aes(x = z_swe_apr1_log, y = amplitude_ratio)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()),
  dat_pasta_monthly %>% 
    ggplot(aes(x = z_precip_sum, y = amplitude_ratio)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()),
  nrow = 1)

Simple model

Simple linear model with additive effects

Code

summary(lm(amplitude_ratio ~ z_gw_influence_log + z_yield_mm_log + z_h2oareakm2_log + z_swe_apr1_log + z_precip_sum, dat_pasta_monthly))

Additive effects with random intercept for year

Code

mymod <- lmerTest::lmer(amplitude_ratio ~ z_gw_influence_log + z_yield_mm_log + z_h2oareakm2_log + z_swe_apr1_log + z_precip_sum + (1 | UpdatedName), data = dat_pasta_monthly)
summary(mymod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
amplitude_ratio ~ z_gw_influence_log + z_yield_mm_log + z_h2oareakm2_log +  
    z_swe_apr1_log + z_precip_sum + (1 | UpdatedName)
   Data: dat_pasta_monthly

REML criterion at convergence: -968.4

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.8886 -0.5780  0.1259  0.6551  2.2134 

Random effects:
 Groups      Name        Variance Std.Dev.
 UpdatedName (Intercept) 0.006892 0.08302 
 Residual                0.005002 0.07073 
Number of obs: 467, groups:  UpdatedName, 61

Fixed effects:
                     Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)         3.910e-01  1.152e-02  5.361e+01  33.952  < 2e-16 ***
z_gw_influence_log -3.199e-02  1.310e-02  5.879e+01  -2.443   0.0176 *  
z_yield_mm_log     -9.008e-05  3.397e-03  4.098e+02  -0.027   0.9789    
z_h2oareakm2_log    2.016e-02  1.153e-02  5.791e+01   1.749   0.0856 .  
z_swe_apr1_log      1.033e-02  6.967e-03  3.972e+02   1.482   0.1391    
z_precip_sum       -1.690e-02  3.650e-03  4.356e+02  -4.630 4.84e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) z_gw__ z_yl__ z_h22_ z_s_1_
z_gw_nflnc_ -0.085                            
z_yld_mm_lg  0.002  0.033                     
z_h2rkm2_lg  0.028  0.550 -0.006              
z_sw_pr1_lg -0.032  0.273  0.114  0.048       
z_precip_sm  0.006  0.115  0.141  0.033  0.279

By GW and flow

No random effects

Simple linear model, groundwater + flow volume

Code

mymod <- lm(amplitude_ratio ~ z_gw_influence_log*z_yield_mm_log, dat_pasta_monthly)
summary(mymod)


Call:
lm(formula = amplitude_ratio ~ z_gw_influence_log * z_yield_mm_log, 
    data = dat_pasta_monthly)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.25194 -0.07106 -0.00085  0.06932  0.34580 

Coefficients:
                                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)                        0.397731   0.004931  80.656  < 2e-16 ***
z_gw_influence_log                -0.034816   0.004947  -7.038 7.06e-12 ***
z_yield_mm_log                     0.001184   0.004961   0.239  0.81148    
z_gw_influence_log:z_yield_mm_log  0.014217   0.005394   2.636  0.00868 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1066 on 463 degrees of freedom
Multiple R-squared:  0.1132,    Adjusted R-squared:  0.1075 
F-statistic: 19.71 on 3 and 463 DF,  p-value: 4.874e-12

Code

sqrt(mean(mymod$residuals^2))

[1] 0.1060988

Plot interactive effects

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_yield_mm_log), max(dat_pasta_monthly$z_yield_mm_log), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_yield_mm_log = x2)
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot() +
  geom_tile(prediction_grid, mapping = aes(x = z_gw_influence_log, y = z_yield_mm_log, fill = y_predicted)) +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) total yield",
    fill = "monthly mean\namplitude ratio"
  ) +
  geom_point(dat_pasta_monthly, mapping = aes(x = z_gw_influence_log, y = z_yield_mm_log), size = 0.2) +
  theme_bw()

By GW and winter snow

No random effects

Simple linear model, groundwater + winter snow

Code

mymod <- lm(amplitude_ratio ~ z_gw_influence_log*z_swe_apr1_log, dat_pasta_monthly)
summary(mymod)


Call:
lm(formula = amplitude_ratio ~ z_gw_influence_log * z_swe_apr1_log, 
    data = dat_pasta_monthly)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.26297 -0.07132  0.00445  0.06408  0.31878 

Coefficients:
                                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)                        0.390432   0.005206  74.991  < 2e-16 ***
z_gw_influence_log                -0.035159   0.005189  -6.776 3.78e-11 ***
z_swe_apr1_log                    -0.007435   0.005744  -1.294    0.196    
z_gw_influence_log:z_swe_apr1_log -0.021390   0.005400  -3.961 8.64e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1056 on 463 degrees of freedom
Multiple R-squared:  0.1298,    Adjusted R-squared:  0.1242 
F-statistic: 23.02 on 3 and 463 DF,  p-value: 6.625e-14

Code

sqrt(mean(mymod$residuals^2))

[1] 0.1051034

Plot interactive effects

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_swe_apr1_log), max(dat_pasta_monthly$z_swe_apr1_log), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_swe_apr1_log = x2)
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot() +
  geom_tile(prediction_grid, mapping = aes(x = z_gw_influence_log, y = z_swe_apr1_log, fill = y_predicted)) +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) April 1 SWE",
    fill = "monthly mean\namplitude ratio"
  ) +
  geom_point(dat_pasta_monthly, mapping = aes(x = z_gw_influence_log, y = z_swe_apr1_log), size = 0.2) + 
  theme_bw()

By GW and summer precip

No random effects

Simple linear model, groundwater + summer precip

Code

mymod <- lm(amplitude_ratio ~ z_gw_influence_log*z_precip_sum, dat_pasta_monthly)
summary(mymod)


Call:
lm(formula = amplitude_ratio ~ z_gw_influence_log * z_precip_sum, 
    data = dat_pasta_monthly)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.23078 -0.07156 -0.00294  0.07016  0.34117 

Coefficients:
                                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      0.397874   0.004903  81.143  < 2e-16 ***
z_gw_influence_log              -0.037955   0.004916  -7.720 7.22e-14 ***
z_precip_sum                    -0.020692   0.004929  -4.198 3.23e-05 ***
z_gw_influence_log:z_precip_sum  0.002614   0.004801   0.544    0.586    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1053 on 463 degrees of freedom
Multiple R-squared:  0.1344,    Adjusted R-squared:  0.1288 
F-statistic: 23.97 on 3 and 463 DF,  p-value: 1.954e-14

Code

sqrt(mean(mymod$residuals^2))

[1] 0.1048228

Plot interactive effects

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_precip_sum), max(dat_pasta_monthly$z_precip_sum), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_precip_sum = x2)
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot() +
  geom_tile(prediction_grid, mapping = aes(x = z_gw_influence_log, y = z_precip_sum, fill = y_predicted)) +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "Cumulative summer precip. (mm)",
    fill = "monthly mean\namplitude ratio"
  ) +
  geom_point(dat_pasta_monthly, mapping = aes(x = z_gw_influence_log, y = z_precip_sum), size = 0.2) +
  theme_bw()

By GW and area

No random effects

Simple linear model, groundwater + flow volume

Code

mymod <- lm(amplitude_ratio ~ z_gw_influence_log*z_h2oareakm2_log, dat_pasta_monthly)
summary(mymod)


Call:
lm(formula = amplitude_ratio ~ z_gw_influence_log * z_h2oareakm2_log, 
    data = dat_pasta_monthly)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.24401 -0.07490  0.00628  0.06636  0.33481 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                          0.405219   0.005420  74.770  < 2e-16 ***
z_gw_influence_log                  -0.019467   0.006006  -3.241  0.00128 ** 
z_h2oareakm2_log                     0.007249   0.006432   1.127  0.26038    
z_gw_influence_log:z_h2oareakm2_log  0.016865   0.005296   3.184  0.00155 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.105 on 463 degrees of freedom
Multiple R-squared:  0.1392,    Adjusted R-squared:  0.1336 
F-statistic: 24.95 on 3 and 463 DF,  p-value: 5.59e-15

Code

sqrt(mean(mymod$residuals^2))

[1] 0.1045361

Plot interactive effects

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_h2oareakm2_log), max(dat_pasta_monthly$z_h2oareakm2_log), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_h2oareakm2_log = x2)
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot() +
  geom_tile(prediction_grid, mapping = aes(x = z_gw_influence_log, y = z_h2oareakm2_log, fill = y_predicted)) +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) watershed area",
    fill = "monthly mean\namplitude ratio"
  ) +
  geom_point(dat_pasta_monthly, mapping = aes(x = z_gw_influence_log, y = z_h2oareakm2_log), size = 0.2) +
  theme_bw()

Full interaction

No random effects.

Fit the model: interactions between groundwater x watershed area and groundwater x flow volume. (with a 3-way interaction, no significant effects)

Code

mymod <- lm(amplitude_ratio ~ z_gw_influence_log*z_h2oareakm2_log + z_gw_influence_log*z_yield_mm_log + z_gw_influence_log*z_swe_apr1_log + z_gw_influence_log*z_precip_sum, dat_pasta_monthly)
summary(mymod)


Call:
lm(formula = amplitude_ratio ~ z_gw_influence_log * z_h2oareakm2_log + 
    z_gw_influence_log * z_yield_mm_log + z_gw_influence_log * 
    z_swe_apr1_log + z_gw_influence_log * z_precip_sum, data = dat_pasta_monthly)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.249348 -0.073112  0.002141  0.063618  0.275447 

Coefficients:
                                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)                          0.4004632  0.0055304  72.412  < 2e-16 ***
z_gw_influence_log                  -0.0160727  0.0065261  -2.463   0.0142 *  
z_h2oareakm2_log                     0.0006563  0.0063546   0.103   0.9178    
z_yield_mm_log                      -0.0007010  0.0047433  -0.148   0.8826    
z_swe_apr1_log                       0.0001664  0.0058926   0.028   0.9775    
z_precip_sum                        -0.0202221  0.0047801  -4.231 2.82e-05 ***
z_gw_influence_log:z_h2oareakm2_log  0.0229710  0.0055983   4.103 4.82e-05 ***
z_gw_influence_log:z_yield_mm_log    0.0120094  0.0051807   2.318   0.0209 *  
z_gw_influence_log:z_swe_apr1_log   -0.0234781  0.0052165  -4.501 8.60e-06 ***
z_gw_influence_log:z_precip_sum      0.0015018  0.0047034   0.319   0.7496    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1005 on 457 degrees of freedom
Multiple R-squared:  0.2217,    Adjusted R-squared:  0.2064 
F-statistic: 14.47 on 9 and 457 DF,  p-value: < 2.2e-16

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_h2oareakm2_log), max(dat_pasta_monthly$z_h2oareakm2_log), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_h2oareakm2_log = x2)
prediction_grid$z_yield_mm_log <- 0
prediction_grid$z_swe_apr1_log <- 0
prediction_grid$z_precip_sum <- 0
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot(prediction_grid, aes(x = z_gw_influence_log, y = z_h2oareakm2_log, fill = y_predicted)) +
  geom_tile() +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) watershed area",
    fill = "monthly mean\namplitude ratio"
  ) +
  theme_bw()

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_yield_mm_log), max(dat_pasta_monthly$z_yield_mm_log), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_yield_mm_log = x2)
prediction_grid$z_h2oareakm2_log <- 0
prediction_grid$z_swe_apr1_log <- 0
prediction_grid$z_precip_sum <- 0
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot(prediction_grid, aes(x = z_gw_influence_log, y = z_yield_mm_log, fill = y_predicted)) +
  geom_tile() +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) total yield",
    fill = "monthly mean\namplitude ratio"
  ) +
  theme_bw()

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_swe_apr1_log), max(dat_pasta_monthly$z_swe_apr1_log), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_swe_apr1_log = x2)
prediction_grid$z_h2oareakm2_log <- 0
prediction_grid$z_yield_mm_log <- 0
prediction_grid$z_precip_sum <- 0
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot(prediction_grid, aes(x = z_gw_influence_log, y = z_swe_apr1_log, fill = y_predicted)) +
  geom_tile() +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) April 1 SWE",
    fill = "monthly mean\namplitude ratio"
  ) +
  theme_bw()

Code

x1 <- seq(min(dat_pasta_monthly$z_gw_influence_log), max(dat_pasta_monthly$z_gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$z_precip_sum), max(dat_pasta_monthly$z_precip_sum), length.out = 100)

prediction_grid <- expand.grid(z_gw_influence_log = x1, z_precip_sum = x2)
prediction_grid$z_h2oareakm2_log <- 0
prediction_grid$z_swe_apr1_log <- 0
prediction_grid$z_yield_mm_log <- 0
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot(prediction_grid, aes(x = z_gw_influence_log, y = z_precip_sum, fill = y_predicted)) +
  geom_tile() +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "Cumulative summer precip. (mm)",
    fill = "monthly mean\namplitude ratio"
  ) +
  theme_bw()

4.2.3 Phase lag

Trim to only “good” phase lag estimates. Roughly half the number of observations.

Code

dat_pasta_monthly_trimmed <- dat_pasta_monthly %>% left_join(noflag) %>% filter(phase_lag_flag == 0)

ggpairs(dat_pasta_monthly_trimmed %>% select(gw_influence_log, Yield_mm_log, h2oAreaKm2_log, month))

Plot relationship between key parameters: no relationship…is this weird? Not necessarily given that the negative correlations between amplitude ratio and phase lag are driven by extreme values during ~winter

Code

egg::ggarrange(dat_pasta_monthly_trimmed %>% ggplot(aes(x = amplitude_ratio, y = phase_lag)) + geom_point(),
               dat_pasta_monthly_trimmed %>% ggplot(aes(x = amplitude_ratio, y = mean_offset)) + geom_point(),
               dat_pasta_monthly_trimmed %>% ggplot(aes(x = phase_lag, y = mean_offset)) + geom_point(),
               nrow = 2, ncol = 2)

4.2.3.1 Model by month/year

Facet by year and month

Code

dat_pasta_monthly_trimmed %>% 
  ggplot(aes(x = gw_influence_log, y = phase_lag)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_grid(rows = vars(year), cols = vars(month)) + 
  theme_bw() + theme(panel.grid = element_blank())

Fit random slope and intercept model, by month and year

Code

mymod <- lmerTest::lmer(phase_lag ~ gw_influence_log + (gw_influence_log | month + year + UpdatedName), data = dat_pasta_monthly_trimmed)
summary(mymod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: phase_lag ~ gw_influence_log + (gw_influence_log | month + year +  
    UpdatedName)
   Data: dat_pasta_monthly_trimmed

REML criterion at convergence: 532.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.04846 -0.64994  0.01007  0.63464  2.57930 

Random effects:
 Groups      Name             Variance Std.Dev. Corr
 UpdatedName (Intercept)      2.11419  1.4540       
             gw_influence_log 0.49138  0.7010   0.98
 year        (Intercept)      0.75140  0.8668       
             gw_influence_log 0.10283  0.3207   1.00
 month       (Intercept)      0.17372  0.4168       
             gw_influence_log 0.03197  0.1788   0.99
 Residual                     0.28456  0.5334       
Number of obs: 270, groups:  UpdatedName, 43; year, 5; month, 4

Fixed effects:
                 Estimate Std. Error      df t value Pr(>|t|)   
(Intercept)        2.3377     0.5360 10.4636   4.361  0.00128 **
gw_influence_log  -0.0155     0.2233 13.3050  -0.069  0.94571   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
gw_nflnc_lg 0.977

View coefficients

Code

coef(mymod)$year

     (Intercept) gw_influence_log
2020    2.349617      -0.01281523
2021    2.152723      -0.07949259
2022    1.645211      -0.26793148
2023    3.758694       0.50903767
2024    1.782265      -0.22628575

Code

coef(mymod)$month

  (Intercept) gw_influence_log
6    2.107019      -0.11769070
7    2.058299      -0.14173562
8    2.312237      -0.01013196
9    2.873255       0.20756838

Relationship between groundwater and amplitude ratio is generally negative, with the strongest negative effects typically seen in August and during the driest year (2021)

4.2.3.2 Model by covariates

Visualize relationships between amplitude ratio and (log) groundwater influence, (log) total monthly yield, and (log) watershed area.

Code

egg::ggarrange(
  dat_pasta_monthly_trimmed %>% 
    ggplot(aes(x = gw_influence_log, y = phase_lag)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()), 
  dat_pasta_monthly_trimmed %>% 
    ggplot(aes(x = Yield_mm_log, y = phase_lag)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()),
  dat_pasta_monthly_trimmed %>% 
    ggplot(aes(x = h2oAreaKm2_log, y = phase_lag)) +
    geom_point() +
    geom_smooth(method = "lm") + 
    theme_bw() + theme(panel.grid = element_blank()),
  nrow = 1)

Simple model

Simple linear model with additive effects

Code

summary(lm(phase_lag ~ gw_influence_log + Yield_mm_log + h2oAreaKm2_log, dat_pasta_monthly_trimmed))

Additive effects with random intercept for year

Code

mymod <- lmerTest::lmer(phase_lag ~ gw_influence_log + Yield_mm_log + h2oAreaKm2_log + (1 | year), data = dat_pasta_monthly_trimmed)
summary(mymod)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: phase_lag ~ gw_influence_log + Yield_mm_log + h2oAreaKm2_log +  
    (1 | year)
   Data: dat_pasta_monthly_trimmed

REML criterion at convergence: 666.9

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.39990 -0.66500  0.07179  0.80290  2.06614 

Random effects:
 Groups   Name        Variance Std.Dev.
 year     (Intercept) 0.08123  0.2850  
 Residual             0.64099  0.8006  
Number of obs: 270, groups:  year, 5

Fixed effects:
                   Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)        1.240533   0.256285  35.045804   4.840 2.59e-05 ***
gw_influence_log  -0.112867   0.069115 262.538039  -1.633    0.104    
Yield_mm_log       0.007654   0.048844 258.948239   0.157    0.876    
h2oAreaKm2_log     0.165959   0.037094 263.068163   4.474 1.14e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) gw_nf_ Yld_m_
gw_nflnc_lg  0.197              
Yield_mm_lg -0.635  0.017       
h2oArKm2_lg -0.347  0.505  0.016

By GW and flow

No random effects

Simple linear model, groundwater * flow volume

Code

mymod <- lm(phase_lag ~ gw_influence_log*Yield_mm_log, dat_pasta_monthly_trimmed)
summary(mymod)


Call:
lm(formula = phase_lag ~ gw_influence_log * Yield_mm_log, data = dat_pasta_monthly_trimmed)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.42018 -0.64187 -0.01196  0.64463  1.59655 

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                    2.25748    0.44205   5.107 6.25e-07 ***
gw_influence_log               0.11582    0.20739   0.558   0.5770    
Yield_mm_log                  -0.20482    0.13142  -1.559   0.1203    
gw_influence_log:Yield_mm_log -0.11621    0.06136  -1.894   0.0593 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8465 on 266 degrees of freedom
Multiple R-squared:  0.07309,   Adjusted R-squared:  0.06264 
F-statistic: 6.992 on 3 and 266 DF,  p-value: 0.0001522

Plot interactive effects

Code

x1 <- seq(min(dat_pasta_monthly$gw_influence_log), max(dat_pasta_monthly$gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$Yield_mm_log), max(dat_pasta_monthly$Yield_mm_log), length.out = 100)

prediction_grid <- expand.grid(gw_influence_log = x1, Yield_mm_log = x2)
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot(prediction_grid, aes(x = gw_influence_log, y = Yield_mm_log, fill = y_predicted)) +
  geom_tile() +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) total yield",
    fill = "monthly mean\nphase lag"
  ) +
  theme_bw()

By GW and area

No random effects

Simple linear model, groundwater * watershed area

Code

mymod <- lm(phase_lag ~ gw_influence_log*h2oAreaKm2_log, dat_pasta_monthly_trimmed)
summary(mymod)


Call:
lm(formula = phase_lag ~ gw_influence_log * h2oAreaKm2_log, data = dat_pasta_monthly_trimmed)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.99342 -0.51645  0.05903  0.57285  2.31855 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      0.40200    0.23719   1.695   0.0913 .  
gw_influence_log                -0.82740    0.17008  -4.865 1.96e-06 ***
h2oAreaKm2_log                   0.43042    0.07140   6.029 5.50e-09 ***
gw_influence_log:h2oAreaKm2_log  0.19615    0.04304   4.557 7.91e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7979 on 266 degrees of freedom
Multiple R-squared:  0.1765,    Adjusted R-squared:  0.1672 
F-statistic:    19 on 3 and 266 DF,  p-value: 3.398e-11

Plot interactive effects

Code

x1 <- seq(min(dat_pasta_monthly$gw_influence_log), max(dat_pasta_monthly$gw_influence_log), length.out = 100)
x2 <- seq(min(dat_pasta_monthly$h2oAreaKm2_log), max(dat_pasta_monthly$h2oAreaKm2_log), length.out = 100)

prediction_grid <- expand.grid(gw_influence_log = x1, h2oAreaKm2_log = x2)
prediction_grid$y_predicted <- predict(mymod, newdata = prediction_grid)

ggplot(prediction_grid, aes(x = gw_influence_log, y = h2oAreaKm2_log, fill = y_predicted)) +
  geom_tile() +
  scale_fill_viridis_c(option = "plasma") + # Use a color scale, e.g., viridis
  labs(
    x = "(log) groundwater influence",
    y = "(log) watershed area",
    fill = "monthly mean\nphase lag"
  ) +
  theme_bw()

Full interaction

No random effects.

Fit the model: interactions between groundwater x watershed area and groundwater x flow volume. (with a 3-way interaction, no significant effects)

Code

mymod <- lm(phase_lag ~ gw_influence_log*h2oAreaKm2_log + gw_influence_log*Yield_mm_log, dat_pasta_monthly_trimmed)
summary(mymod)


Call:
lm(formula = phase_lag ~ gw_influence_log * h2oAreaKm2_log + 
    gw_influence_log * Yield_mm_log, data = dat_pasta_monthly_trimmed)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.21916 -0.51605  0.02327  0.53864  2.33568 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      0.90997    0.47594   1.912   0.0570 .  
gw_influence_log                -0.50960    0.25958  -1.963   0.0507 .  
h2oAreaKm2_log                   0.42286    0.07144   5.919 1.00e-08 ***
Yield_mm_log                    -0.15093    0.12394  -1.218   0.2244    
gw_influence_log:h2oAreaKm2_log  0.19084    0.04306   4.432 1.37e-05 ***
gw_influence_log:Yield_mm_log   -0.09245    0.05787  -1.597   0.1114    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7962 on 264 degrees of freedom
Multiple R-squared:  0.1861,    Adjusted R-squared:  0.1707 
F-statistic: 12.08 on 5 and 264 DF,  p-value: 1.518e-10