(In Edit 2 below, I figured out the problem)
Something like this is, I think, along the lines of what you are suggesting (using the same example data as I posted above).
data {
int<lower = 0> N;
int<lower = 0> N_obs;
int<lower = 0> N_mis;
int<lower = 1, upper = N> ii_obs[N_obs];
int<lower = 1, upper = N> ii_mis[N_mis];
real y[N];
vector[N_obs] x_obs;
}
parameters {
simplex[N_mis + 1] x_mis;
real<lower=0> sigma;
real alpha;
real beta;
}
transformed parameters {
vector[N] x;
vector[N_mis] x_mis_t;
x[ii_obs] = x_obs;
x_mis_t = x_obs[9] - ((cumulative_sum(x_mis)) * (x_obs[9] - x_obs[16]));
x[ii_mis] = x_mis_t;
}
model {
for (n in 1:N) {
target += normal_lpdf(y[n] | alpha + beta*x[n], sigma);
}
target += normal_lpdf(sigma | 0, 2.5) - 1 * normal_lccdf(0 | 0, 2.5);
target += normal_lpdf(alpha | 2, 2.5);
target += normal_lpdf(beta | 0, 5);
}
However, I’m having a little trouble with the coding. The above model doesn’t sample, because I declare the simplex to be N_mis+1. It will sample if you declare simplex to be N_mis, but that is incorrect, because I need the cutpoints in the simplex to equal the number of missing values, so it must be 1 longer. I think the problem with sampling the above may come in this line, x_mis_t = x_obs[9] - ((cumulative_sum(x_mis)) * (x_obs[9] - x_obs[16]));
. How do I get only the cumulative sum up to length N_mis (1 shorter than the length of the simplex) ? Thanks!
Edit: I tried changing the transformed parameters block to this:
transformed parameters {
vector[N] x;
vector[N_mis] x_mis_t;
vector[N_mis] x_mis_s;
x[ii_obs] = x_obs;
x_mis_s = x_mis[1:N_mis];
x_mis_t = x_obs[9] - ((cumulative_sum(x_mis_s)) * (x_obs[9] - x_obs[16]));
x[ii_mis] = x_mis_t;
}
Which samples and gives the correct value for x_mis_s
, but the x_mis_t
is still well off (0.8, 0.78, 0.76, 0.75, 0.73, 0.71), so cumulative_sum
doesn’t appear to work how I think it works…
Edit 2 - The code was correct except the index was wrong. Should be x_mis_t = x_obs[9] - ((cumulative_sum(x_mis_s)) * (x_obs[9] - x_obs[10]));
. Dang, I’m gonna have to be careful with all the indexing that will go into code with multiple simplexes, but I think this is what you were suggesting and seems to work well. Thanks for the idea! Full reproducible example, below.
library(rstan)
x <- seq(from=0.9, to=0.1, by=-0.01)
length(x)
mu <- 2 + 1.5*x
y <- rnorm(length(x), mu, 1)
x[10:15] <- NA
d <- list(y = y, x_obs = c(x[1:9],x[16:81]), N = length(y), N_obs = 75, N_mis = 6, ii_obs = c(1:9,16:81), ii_mis = c(10:15))
stan_code <- "
data {
int<lower = 0> N;
int<lower = 0> N_obs;
int<lower = 0> N_mis;
int<lower = 1, upper = N> ii_obs[N_obs];
int<lower = 1, upper = N> ii_mis[N_mis];
real y[N];
vector[N_obs] x_obs;
}
parameters {
simplex[N_mis + 1] x_mis;
real<lower=0> sigma;
real alpha;
real beta;
}
transformed parameters {
vector[N] x;
vector[N_mis] x_mis_t;
vector[N_mis] x_mis_s;
x[ii_obs] = x_obs;
x_mis_s = cumulative_sum(x_mis[1:N_mis]);
x_mis_t = x_obs[9] - ((x_mis_s) * (x_obs[9] - x_obs[10]));
x[ii_mis] = x_mis_t;
}
model {
for (n in 1:N) {
target += normal_lpdf(y[n] | alpha + beta*x[n], sigma);
}
target += normal_lpdf(sigma | 0, 2.5) - 1 * normal_lccdf(0 | 0, 2.5);
target += normal_lpdf(alpha | 2, 2.5);
target += normal_lpdf(beta | 0, 5);
}
"
#fit model
simple_mis_simplex_mod <- stan(model_code = stan_code, data = d,
chains = 4, iter = 2000, warmup = 1000,
thin = 1, cores = 4)