- Windows 7 x64 (build 7601) Service Pack 1
rstan * 2.19.9 2019-11-04 [1] Github (stan-dev/rstan@4f809a0)
I’m not really sure if this is a bug or I’m missing something.
The function gqs
seems to not work, when there’s a transformed parameters block in its stanmodel
. Please see the example below.
So, this works:
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
sc <- "
transformed data{
vector[10] y = [4.65, 6.02, 4.92, 4.77, 8.12, 6.93, 7.39, 7.6, 5.68, 2.14]';
}
parameters{
real mu;
real log_sigma;
}
model{
y ~ normal(mu, exp(log_sigma));
mu ~ normal(0, 10);
log_sigma ~ normal(0, 2.5);
}
"
sm <- stan_model(model_code = sc)
post <- sampling(sm)
sc_gq <- "
parameters{
real mu;
real log_sigma;
}
generated quantities{
vector[10] y_rep;
for (i in 1:10)
y_rep[i] = normal_rng(mu, exp(log_sigma));
}
"
sm_gq <- stan_model(model_code = sc_gq)
rep <- gqs(sm_gq, draws = as.matrix(post))
rep
Inference for Stan model: bdf3c0b7af993ed90b29cc81706a90ea.
1 chains, each with iter=4000; warmup=0; thin=1;
post-warmup draws per chain=4000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
y_rep[1] 5.74 0.03 2.08 1.51 4.43 5.78 7.11 9.82 3801 1
y_rep[2] 5.80 0.04 2.16 1.52 4.45 5.80 7.12 10.11 3539 1
y_rep[3] 5.78 0.04 2.17 1.47 4.46 5.81 7.09 10.00 3790 1
y_rep[4] 5.76 0.04 2.15 1.46 4.43 5.77 7.09 10.08 3524 1
y_rep[5] 5.82 0.04 2.08 1.79 4.51 5.81 7.10 9.90 3270 1
y_rep[6] 5.78 0.03 2.15 1.35 4.49 5.82 7.12 10.05 3876 1
y_rep[7] 5.75 0.04 2.14 1.47 4.38 5.71 7.12 10.02 3401 1
y_rep[8] 5.78 0.03 2.12 1.56 4.44 5.78 7.10 10.00 3995 1
y_rep[9] 5.85 0.03 2.14 1.61 4.54 5.86 7.17 10.10 3745 1
y_rep[10] 5.82 0.04 2.20 1.42 4.46 5.83 7.15 10.18 3717 1
Samples were drawn using at Tue Nov 05 15:29:47 2019.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
But this doesn’t:
sc2 <- "
transformed data{
vector[10] y = [4.65, 6.02, 4.92, 4.77, 8.12, 6.93, 7.39, 7.6, 5.68, 2.14]';
}
parameters{
real mu;
real log_sigma;
}
transformed parameters{
real<lower=0> sigma = exp(log_sigma);
}
model{
y ~ normal(mu, sigma);
mu ~ normal(0, 10);
log_sigma ~ normal(0, 2.5);
}
"
sm2 <- stan_model(model_code = sc2)
post2 <- sampling(sm2)
sc_gq2 <- "
parameters{
real mu;
real log_sigma;
}
transformed parameters{
real<lower=0> sigma = exp(log_sigma);
}
generated quantities{
vector[10] y_rep;
for (i in 1:10)
y_rep[i] = normal_rng(mu, sigma);
}
"
sm_gq2 <- stan_model(model_code = sc_gq2)
rep2 <- gqs(sm_gq2, draws = as.matrix(post2))
rep2
Wrong number of parameter values in draws from fitted model. Expecting 2 columns, found 3 columns.
Inference for Stan model: 9a372375694ef14366f13bf8c7378254.
1 chains, each with iter=4000; warmup=0; thin=1;
post-warmup draws per chain=4000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
y_rep[1] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[2] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[3] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[4] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[5] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[6] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[7] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[8] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[9] 0 NaN 0 0 0 0 0 0 NaN NaN
y_rep[10] 0 NaN 0 0 0 0 0 0 NaN NaN
Samples were drawn using at Tue Nov 05 15:31:10 2019.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Removing the sigma
from draws
yields and error.
rep2 <- gqs(sm_gq2, draws = as.matrix(post2)[,-3])
Error in draws[, draws_colnames, drop = FALSE] : subscript out of bounds
Using the draws from post2
WITHOUT removing sigma
with the first sm_gq
works.
> gqs(sm_gq, draws = as.matrix(post2))
Inference for Stan model: bdf3c0b7af993ed90b29cc81706a90ea.
1 chains, each with iter=4000; warmup=0; thin=1;
post-warmup draws per chain=4000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
y_rep[1] 5.82 0.04 2.16 1.54 4.51 5.82 7.08 10.20 3487 1
y_rep[2] 5.83 0.03 2.14 1.54 4.51 5.85 7.13 9.99 3851 1
y_rep[3] 5.82 0.04 2.16 1.64 4.44 5.83 7.13 10.17 3743 1
y_rep[4] 5.75 0.04 2.19 1.36 4.48 5.75 7.10 10.09 3354 1
y_rep[5] 5.85 0.03 2.14 1.77 4.48 5.81 7.17 10.21 3879 1
y_rep[6] 5.82 0.03 2.14 1.52 4.52 5.83 7.15 10.07 3830 1
y_rep[7] 5.83 0.04 2.17 1.60 4.52 5.82 7.13 10.07 3509 1
y_rep[8] 5.86 0.03 2.08 1.64 4.52 5.85 7.17 10.09 3700 1
y_rep[9] 5.83 0.04 2.13 1.54 4.53 5.82 7.14 10.11 3362 1
y_rep[10] 5.77 0.03 2.11 1.72 4.47 5.76 7.08 10.01 3740 1
Samples were drawn using at Tue Nov 05 15:34:33 2019.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
Is this intentional? It doesn’t seem very intuitive from a users perspective.
Cheers!
Max