I have a periodic timeseries model that works seemingly great when it only sees one timeseries, recovering simulated frequency, phase, amplitude and noise parameters great:
but when I expand this model to expect multiple timeseries that differ only in their noise (not even the magnitude of noise, so they’re effectively IID samples from the one-timeseries model), it get lots of divergences and the different chains explore very different areas of the parameter space.
Below is the model and R code for exploring this behaviour. Any ideas what might be going awry here? I feel like I must be missing something very simple but I’ve banged my head on this for a few days now and haven’t worked out what, so any help would be greatly appreciated!
functions{
// p(): an example periodic-but-asymmetric function
vector p(vector x){
return(sin(x+sin(x)/2)) ;
}
}
data{
// num_samples: number of samples on the x-axis
int num_samples ;
// num_signals: number of signals (columns in y)
int num_signals ;
// y: matrix of observation
matrix[num_samples,num_signals] y ;
// x: values on the x-axis associated with the observations in y
vector[num_samples] x ;
// sample_prior_only: helper to toggle on/off posterior sampling
int<lower=0,upper=1> sample_prior_only ;
}
transformed data{
// pre-compute part of the input to p()
vector[num_samples] x_times_pi_times_2 = x*pi()*2 ;
// compute some quantities for constaining hz
real hz_max = 1/((x[2]-x[1])*5) ; //assumes equal spacing
real hz_min = 1/(x[num_samples]-x[1]) ;
real hz_max_minus_min = hz_max - hz_min ;
}
parameters{
// noise: measurement noise
real<lower=0> noise ;
// hz_scaled01: helper variable for constaining hz
real<lower=0,upper=1> hz_helper ;
// phamp: phase-and-amplitude vector (actually a transform thereof)
vector[2] phamp ;
}
transformed parameters{
// get hz given hz_helper and constraints
real hz = hz_helper * hz_max_minus_min + hz_min ;
// extract phase from phamp
real phase = atan2(phamp[1],phamp[2]) ;
// extract amp from phamp (vector magnitude)
real amp = sqrt(dot_self(phamp)) ;
// compute the latent function given x, hz, amp & phase
vector[num_samples] f = amp * p(x_times_pi_times_2*hz - phase) ;
}
model{
// prior for hz_helper: inverted-u-shaped prior on support
hz_helper ~ beta(2,2) ;
// prior for noise: moderately-informed given simulated data
noise ~ std_normal() ;
// prior for pamp: implies a weakly-informed prior on amplitude
phamp ~ std_normal() ;
//if not simply seeking to sample from the prior,
//likelihood is the observed data plus noise
if(!sample_prior_only){
for(this_signal in 1:num_signals){
y[,this_signal] ~ normal(f,noise) ;
}
}
}
And R code for playing:
#preamble ----
#load packages used
library(cmdstanr)
library(tidyverse)
#load and compile the model
mod = cmdstan_model('hz_mat.stan')
# example function that's periodic but asymmetric
p = function(x,hz,phase){
x = x*(2*pi)*hz - phase
sin(x+sin(x)/2)
}
# Generate data ----
seed = 1 #vary this across simulations
num_signals = 1 #if 1, model works fine; if >2 shitshow.
set.seed(seed)
dat = (
tibble::tibble(
x = seq(0,10,by=1/10)
, f = p(x,hz=1,phase=0)
)
%>% tidyr::expand_grid(
signal = sprintf('signal%02d',1:num_signals)
)
%>% dplyr::group_by(signal)
%>% dplyr::mutate(
y = f + rnorm(n(),0,.1)
)
)
# quick viz:
(
dat
%>% ggplot()
+ geom_point(aes(x=x,y=y),alpha = .5)
+ geom_line(aes(x=x,y=f))
)
# reshape to wide
dat_wide =
(
dat
%>% tidyr::pivot_wider(
names_from = 'signal'
, values_from = 'y'
)
#model assumes that the data is ordered by x
%>% dplyr::arrange(x)
)
# sample & check diagnostics ----
fit = mod$sample(
data = list(
num_samples = nrow(dat_wide)
, num_signals = ncol(dat_wide)-2
, y = (
dat_wide
%>% dplyr::select(starts_with('signal'))
%>% as.matrix()
)
, x = dat_wide$x #assumed to be ordered
, sample_prior_only = F
)
, chains = parallel::detectCores()/2-1
, parallel_chains = parallel::detectCores()/2-1
, refresh = 1000
, seed = seed
, iter_warmup = 1e3
, iter_sampling = 1e3
)
diagnostics =
(
fit$summary()
%>% dplyr::select(variable,rhat,contains('ess'))
%>% dplyr::filter(substr(variable,1,1)!='f')
%>% dplyr::mutate(
diagnostics = fit$cmdstan_diagnose()$stdout #annoyingly not quiet-able
, treedepth_exceeded = str_detect(diagnostics,'transitions hit the maximum')
, ebmfi_low = str_detect(diagnostics,' is below the nominal threshold')
)
%>% dplyr::select(-diagnostics)
)
print(diagnostics)
# Visualize the posteriors on parameters ---
bayesplot::mcmc_hist(fit$draws(variables=c('phase','hz','amp','noise','snr')))
bayesplot::mcmc_hist_by_chain(fit$draws(variables=c('phase')))
bayesplot::mcmc_hist_by_chain(fit$draws(variables=c('hz')))
bayesplot::mcmc_hist_by_chain(fit$draws(variables=c('amp')))
bayesplot::mcmc_hist_by_chain(fit$draws(variables=c('noise')))
# extract draws and visualize the posterior on the latent function ----
draws =
(
fit$draws(
variables = 'f'
)
%>% posterior::as_draws_df()
%>% tibble::as_tibble()
%>% dplyr::select(-.iteration)
%>% tidyr::pivot_longer(
cols = starts_with('f')
, names_to = 'x'
)
%>% dplyr::mutate(
x = str_replace(x,fixed('f['),'')
, x = str_replace(x,fixed(']'),'')
, x = dat_wide$x[as.numeric(x)]
, .chain = factor(.chain)
)
)
(
draws
%>% dplyr::group_by(
x
, .chain
)
%>% dplyr::summarise(
med = mean(value)
, lo50 = quantile(value,.25)
, hi50 = quantile(value,.75)
, lo95 = quantile(value,.025)
, hi95 = quantile(value,.975)
, .groups = 'drop'
)
%>% ggplot()
+ facet_grid(.chain~.)
+ geom_line(
data = dat
, mapping = aes(
x = x
, y = f
)
# , size = 2
# , linetype=2
)
+ geom_point(
data = dat
, mapping = aes(
x = x
, y = y
)
, alpha = .5
)
+ geom_line(
data =
(
draws
%>% dplyr::nest_by(.chain,.draw)
%>% dplyr::group_by(.chain)
%>% dplyr::slice_sample(n=10)
%>% tidyr::unnest(cols=data)
)
, mapping = aes(
x = x
, y = value
, group = .draw
# , colour = .chain
)
, alpha = .2
)
+ geom_ribbon(
mapping = aes(
x = x
, ymin = lo50
, ymax = hi50
, group = .chain
, fill = .chain
)
, alpha = .5
)
+ geom_ribbon(
mapping = aes(
x = x
, ymin = lo95
, ymax = hi95
, group = .chain
, fill = .chain
)
, alpha = .5
)
)