Autocorrelation for unevenly spaced time series

If anyone comes across this and is still looking for a solution (I don’t believe it is available in {brms} yet, you can implement the continuous time AR1 model (exactly as @ucfagls defines it above) in the {mvgam} package. A short example is shown below

library(mvgam)
library(ggplot2); theme_set(theme_classic())

# A short example to illustrate CAR(1) models
# Function to simulate CAR1 data with seasonality
sim_corcar1 = function(n = 120,
                       phi = 0.5,
                       sigma = 1,
                       sigma_obs = 0.75){
  # Sample irregularly spaced time intervals
  time_dis <- c(0, runif(n - 1, -0.1, 1))
  time_dis[time_dis < 0] <- 0; time_dis <- time_dis * 5
  
  # Set up the latent dynamic process
  x <- vector(length = n); x[1] <- -0.3
  for(i in 2:n){
    # zero-distances will cause problems in sampling, so mvgam uses a
    # minimum threshold; this simulation function emulates that process
    if(time_dis[i] == 0){
      x[i] <- rnorm(1, mean = (phi ^ 1e-12) * x[i - 1], sd = sigma)
    } else {
      x[i] <- rnorm(1, mean = (phi ^ time_dis[i]) * x[i - 1], sd = sigma)
    }
  }
  
  # Add 12-month seasonality
  cov1 <- sin(2 * pi * (1 : n) / 12); cov2 <- cos(2 * pi * (1 : n) / 12)
  beta1 <- runif(1, 0.3, 0.7); beta2 <- runif(1, 0.2, 0.5)
  seasonality <- beta1 * cov1 + beta2 * cov2
  
  # Take Gaussian observations with error and return
  data.frame(y = rnorm(n, mean = x + seasonality, sd = sigma_obs),
             season = rep(1:12, 20)[1:n],
             time = cumsum(time_dis))
}

# Sample two time series
dat <- rbind(dplyr::bind_cols(sim_corcar1(phi = 0.65,
                                          sigma_obs = 0.55),
                              data.frame(series = 'series1')),
             dplyr::bind_cols(sim_corcar1(phi = 0.8,
                                          sigma_obs = 0.35),
                              data.frame(series = 'series2'))) %>%
  dplyr::mutate(series = as.factor(series))

# mvgam with CAR(1) trends and series-level seasonal smooths; the
# State-Space representation (using trend_formula) will be more efficient
mod <- mvgam(formula = 
               
               # Observation formula; just the intercept
               y ~ 1,
             
             # Process formula, containing the seasonal smooths
             trend_formula = ~ s(season, bs = 'cc',
                                 k = 5, by = trend),
             
             # CAR(1) dynamics
             trend_model = CAR(),
             
             # Showcasing how to modify priors
             priors = c(prior(beta(2, 2),
                              class = ar1,
                              lb = 0, 
                              ub = 1),
                        prior(exponential(2),
                            class = sigma)),
             data = dat,
             family = gaussian(),
             burnin = 750,
             samples = 750)
#> Using cmdstanr as the backend
#> All 4 chains finished successfully.
#> Mean chain execution time: 4.8 seconds.
#> Total execution time: 6.1 seconds.
#> Warning: 35 of 3000 (1.0%) transitions ended with a divergence.
#> See https://mc-stan.org/misc/warnings for details.
#> Warning: 4 of 4 chains had an E-BFMI less than 0.2.
#> See https://mc-stan.org/misc/warnings for details.

# Model summary
summary(mod)
#> GAM observation formula:
#> y ~ 1
#> 
#> GAM process formula:
#> ~s(season, bs = "cc", k = 5, by = trend)
#> 
#> Family:
#> gaussian
#> 
#> Link function:
#> identity
#> 
#> Trend model:
#> CAR()
#> 
#> N process models:
#> 2 
#> 
#> N series:
#> 2 
#> 
#> N timepoints:
#> 120 
#> 
#> Status:
#> Fitted using Stan 
#> 4 chains, each with iter = 1500; warmup = 750; thin = 1 
#> Total post-warmup draws = 3000
#> 
#> 
#> Observation error parameter estimates:
#>               2.5%  50% 97.5% Rhat n_eff
#> sigma_obs[1] 0.260 0.73  0.99 1.03    390
#> sigma_obs[2] 0.063 0.16  0.42 1.05    307
#> 
#> GAM observation model coefficient (beta) estimates:
#>              2.5%    50% 97.5% Rhat n_eff
#> (Intercept) -0.21 0.0016  0.26 1.03   160
#> 
#> Process model AR parameter estimates:
#>        2.5%  50% 97.5% Rhat n_eff
#> ar1[1] 0.48 0.74  0.89 1.02   312
#> ar1[2] 0.59 0.70  0.80 1.00  1600
#> 
#> Process error parameter estimates:
#>          2.5%  50% 97.5% Rhat n_eff
#> sigma[1] 0.45 0.74   1.1 1.02    501
#> sigma[2] 0.89 1.00   1.2 1.01   580
#> 
#> GAM process model coefficient (beta) estimates:
#>                                 2.5%   50%  97.5% Rhat n_eff
#> s(season):trendtrend1.1_trend -0.300  0.11  0.500 1.01  1112
#> s(season):trendtrend1.2_trend -1.100 -0.62 -0.170 1.00  1104
#> s(season):trendtrend1.3_trend -0.850 -0.42 -0.040 1.01   831
#> s(season):trendtrend2.1_trend -0.044  0.38  0.910 1.00  1282
#> s(season):trendtrend2.2_trend -1.100 -0.53 -0.033 1.01   414
#> s(season):trendtrend2.3_trend -0.950 -0.47 -0.040 1.00  1690
#> 
#> Approximate significance of GAM process smooths:
#>                         edf Ref.df    F p-value  
#> s(season):seriestrend1 2.08      3 5.58   0.014 *
#> s(season):seriestrend2 2.61      3 5.37   0.025 *
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Stan MCMC diagnostics:
#> n_eff / iter looks reasonable for all parameters
#>  *Diagnose further to investigate why the chains have not mixed
#> 35 of 3000 iterations ended with a divergence (1.1667%)
#>  *Try running with larger adapt_delta to remove the divergences
#> 
#> Samples were drawn using NUTS(diag_e) at Mon May 20 3:47:16 PM 2024.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split MCMC chains
#> (at convergence, Rhat = 1)

# AR1 params (truths were 0.65 and 0.8)
mcmc_plot(mod, variable = 'ar1', 
          regex = TRUE, 
          type = 'hist')
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Observation and process variance params
mcmc_plot(mod, variable = 'sigma', 
          regex = TRUE, 
          type = 'hist')
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Seasonal smooths using {marginaleffects}
conditional_effects(mod, type = 'expected')

plot_predictions(mod, 
                 condition = c('season', 'series', 'series'),
                 points = 0.5, type = 'response')

# Posterior predictive checks
pp_check(mod, type = 'ribbon_grouped', group = 'series')
#> Using all posterior draws for ppc type 'ribbon_grouped' by default.

# Randomized Quantile residuals
plot(mod, type = 'residuals', series = 1)

plot(mod, type = 'residuals', series = 2)

Created on 2024-05-20 with reprex v2.1.0

4 Likes