Hi there,
I am trying to implement a multiple Change Point Detection using stan based on the website descriped the concept in stan:
https://nowave.it/pages/bayesian-changepoint-detection-with-r-and-stan.html
The model is well predict for one change point, however when I try to add more change point for example just two change point, then the prediction seems is not accurate. I am not sure if I am made mistake on constructing the Likelihood and the Generate Quantities block for using softmax command. I am trying to use the Dynamic Programming to increase the speed of the simulation as the change point increase. I did all in 1D vector format step by step and then try to append the vectors to one vector to construct the final likelihhod. here is the code:
beta0 <- 3
beta1 <- 9
beta2 <- 15
set.seed(42)
x <- sort(runif(100, 0, 10))
x1 <- head(x, 41)
x2 <- tail(x, 59)
y1 <- rnorm(41, mean=beta0 + beta1 * x1, sd=4.3)
y2 <- rnorm(59, mean=beta0 + beta2 * x2, sd=6.3)
y <- c(y1, y2)
plot(x, y)
stan_code2 <- '
data {
int<lower=1> N;
real x[N];
real y[N];
}
parameters {
real beta0;
real beta1;
real beta2;
real beta3;
real<lower=0> sigma;
}
transformed parameters {
vector[N] mu1;
vector[N] mu2;
vector[N] mu3;
vector[N] log_p;
vector[N] log_p2;
{
vector[N+1] log_p_e;
vector[N+1] log_p_l;
vector[N+1] log_p_m;
log_p_e[1] = 0;
log_p_l[1] = 0;
log_p_m[1] = 0;
for (i in 1:N) {
mu1[i] = beta0 + beta1 * x[i];
mu2[i] = beta0 + beta2 * x[i];
mu3[i] = beta0 + beta3 * x[i];
log_p_e[i + 1] = log_p_e[i] + normal_lpdf(y[i] | mu1[i], sigma);
log_p_l[i + 1] = log_p_l[i] + normal_lpdf(y[i] | mu2[i], sigma);
log_p_m[i + 1] = log_p_m[i] + normal_lpdf(y[i] | mu3[i], sigma);
}
log_p = rep_vector(-log(N) + log_p_l[N + 1], N) + head(log_p_e, N) - head(log_p_l, N);
log_p2 = rep_vector(-log(N) + log_p_m[N + 1], N) + head(log_p_l, N) - head(log_p_m, N);
}
}
model {
beta0 ~ normal(0, 100);
beta1 ~ normal(0, 100);
beta2 ~ normal(0, 100);
beta3 ~ normal(0, 100);
sigma ~ normal(0, 100);
target += log_sum_exp(append_row(log_p, log_p2));
}
generated quantities {
int<lower=1,upper=N> tau;
int<lower=1,upper=N> tau2;
// K simplex are a data type in Stan
simplex[N] sp;
simplex[N] sp2;
sp = softmax(log_p);
tau = categorical_rng(sp);
sp2 = softmax(log_p2);
tau2 = categorical_rng(sp2);
}
'
fit <- stan(
model_code = stan_code2,
data = list(x = x, y = y, N=length(x)),
chains = 4,
iter = 10000,
cores = 4,
refresh = 500
)
plot(fit, pars=c("beta0", "beta1", "beta2","beta3","sigma"))
plot(fit, pars=c("tau","tau2"))
I will be thankful if anyone give me the idea to solve the issue. I am guessing that the model must find the tau and tau2 both around 42 as its only one change point in the sample data. but it gaves me other values.