Here is some toy data. I have a truncated regression that sets a lower bound on the outcome using y[i] ~ normal(mu, sigma/weights[i]) T[L[i],];
The code below shows the linear fit WITHOUT the truncation using T[L[i,] you can see the fitted lines are in the correct direction and here is the linear fit WITH T[L[i],] which should set the lower bound on the outcome y >= -x and you can see the fit is not correct.
Why is T[L[i],] doing that? If you look at
table(dat$y >= dat$BOUND) you will see all the outcomes are above the bound
here is the code:
set.seed(456)
x = rep(seq(-10,10,1),each=5)
y = rep(0,length(x) )
## the outcome y has the relationship y >= -x
for(i in 1:length(x)){
y[i] = sample( seq(-x[i],-x[i]+ ifelse(x[i]<0 ,
sample(seq(1,10,.1),1),
sample(seq(5,40,.1),1) ),1)
,1)
}
weights = sample( seq(1,20,1) ,length(x), replace = TRUE)
weights = weights/sum(weights)
groups = rep( letters[1:5], times =length(x)/5 )
dat =data.frame(x = x, y = y, weights = weights, group = groups, BOUND = -x)
library(rstan)
head(dat)
dat$GROUP_ID = match( dat$group, levels( dat$group ) )
dat$BOUND = -dat$x
table(dat$y >= dat$BOUND)
quantile(dat$weights)
library(rstan)
standat <- list(
N = nrow(dat),
y = dat$y,
x = dat$x,
GROUP_ID = dat$GROUP_ID,
nGROUPS = 5,
weights = dat$weights,
L = dat$BOUND
)
table(dat$y >= dat$BOUND)
stanmodelcode = '
data {
int<lower=1> N;
int nGROUPS;
real y[N];
real x[N];
int<lower=1, upper=nGROUPS> GROUP_ID[N];
real L[N];
real weights[N];
}
transformed data{
}
parameters {
real intercept;
vector[nGROUPS] intercept_x2;
real beta;
real<lower=0> sigma;
real<lower=0> sigma_2;
}
transformed parameters { // none needed
}
model {
real mu;
// priors
intercept~ normal(0,10);
intercept_x2 ~ normal(0,sigma_2);
beta ~ normal(0,10);
sigma ~ normal(0,10);
sigma_2 ~ normal(0,10);
// likelihood
for(i in 1:N){
mu = intercept + intercept_x2[ GROUP_ID[i] ] + beta*x[i];
y[i] ~ normal(mu, sigma/weights[i]) T[L[i],] ;
}
}
'
fit22 = stan(model_code=stanmodelcode, data=standat, iter=10000, warmup=2000, chains = 1)
fit22
post = rstan::extract(fit22)
get_post_pred = function(post, x, GROUP_ID){
pred = with(post, intercept + intercept_x2[,GROUP_ID] + beta*x )
return(pred)
}
head(dat)
head(post)
posterior_prediction_in_columns = sapply(1:length(dat$x), function(i) get_post_pred(post, dat$x[i], dat$GROUP_ID[i]) )
head(posterior_prediction_in_columns)
mean_column_prediciton = apply( posterior_prediction_in_columns,2,mean )
dat$BayesPrediction = mean_column_prediciton
head(dat)
## show predictions on current data set
ggplot(data = dat, aes(x = x, y = y, color= group))+geom_point(aes(size = weights)) + #+facet_wrap(~CPSU)
geom_line(data = dat, aes(x = x, y = BayesPrediction, color = group))+geom_hline(yintercept = 0)+geom_vline(xintercept = 0)