I have a similar model with just a slight twist. I assume x and y are observed with known uncertainties that are different for each observation. The latent y is linear in the latent x + an additive error with unknown standard deviation. Here’s the full Stan model
/**
* Simple regression with measurement error in x an y
*/
data {
int<lower=0> N;
vector[N] x;
vector<lower=0>[N] sd_x;
vector[N] y;
vector<lower=0>[N] sd_y;
}
// standardize data
transformed data {
real mean_x = mean(x);
real sd_xt = sd(x);
real mean_y = mean(y);
real sd_yt = sd(y);
vector[N] xhat = (x - mean_x)/sd_xt;
vector<lower=0>[N] sd_xhat = sd_x/sd_xt;
vector[N] yhat = (y - mean_y)/sd_yt;
vector<lower=0>[N] sd_yhat = sd_y/sd_yt;
}
parameters {
vector[N] x_lat;
vector[N] y_lat;
real beta0;
real beta1;
real<lower=0> sigma;
}
transformed parameters {
vector[N] mu_yhat = beta0 + beta1 * x_lat;
}
model {
beta0 ~ normal(0., 2.);
beta1 ~ normal(0., 5.);
sigma ~ normal(0., 2.);
xhat ~ normal(x_lat, sd_xhat);
y_lat ~ normal(mu_yhat, sigma);
yhat ~ normal(y_lat, sd_yhat);
}
generated quantities {
vector[N] xhat_new;
vector[N] yhat_new;
vector[N] y_lat_new;
vector[N] x_new;
vector[N] y_new;
vector[N] mu_x;
vector[N] mu_y;
real b0;
real b1;
b0 = mean_y + sd_yt*beta0 - beta1*sd_yt*mean_x/sd_xt;
b1 = beta1*sd_yt/sd_xt;
mu_x = x_lat*sd_xt + mean_x;
mu_y = mu_yhat*sd_yt + mean_y;
for (n in 1:N) {
xhat_new[n] = normal_rng(x_lat[n], sd_xhat[n]);
y_lat_new[n] = normal_rng(beta0 + beta1 * x_lat[n], sigma);
yhat_new[n] = normal_rng(y_lat_new[n], sd_yhat[n]);
x_new[n] = sd_xt * xhat_new[n] + mean_x;
y_new[n] = sd_yt * yhat_new[n] + mean_y;
}
}
Here’s some R code to create a reproducible example:
testme <- function(N=200, b0=10, b1=1, sigma=0.1, seed1=1234, seed2=23455) {
require(rstan)
require(ggplot2)
set.seed(seed1)
x_lat <- rnorm(N)
sd_x <- runif(N, max=0.2)
sd_y <- runif(N, max=0.2)
x_obs <- x_lat+rnorm(N, sd=sd_x)
y_lat <- b0 + b1*x_lat + rnorm(N, sd=sigma)
y_obs <- y_lat + rnorm(N, sd=sd_y)
stan_dat <- list(N=N, x=x_obs, sd_x=sd_x, y=y_obs, sd_y=sd_y)
df1 <- data.frame(x_lat=x_lat, x=x_obs, sd_x=sd_x, y_lat=y_lat, y=y_obs, sd_y=sd_y)
sfit <- stan(file="ls_me.stan", data=stan_dat, seed=seed2, chains=4)
print(sfit, pars=c("b0", "b1", "sigma"), digits=3)
g1 <- ggplot(df1)+geom_point(aes(x=x, y=y))+geom_errorbar(aes(x=x,ymin=y-sd_y,ymax=y+sd_y)) +
geom_errorbarh(aes(y=y, xmin=x-sd_x, xmax=x+sd_x))
post <- extract(sfit)
df2 <- data.frame(x_new=as.numeric(post$x_new), y_new=as.numeric(post$y_new))
g1 <- g1 + stat_ellipse(aes(x=x_new, y=y_new), data=df2, geom="path", type="norm", linetype=2, color="blue")
plot(g1)
list(sfit=sfit, stan_dat=stan_dat)
}
which should produce this graph: