Dear all,
(sorry if I’m double-posting - I tried posting it on the google group, but I don’t think it went through…)
I have coded a very simple hierarchical regression in Stan with 2 predictors that ran reasonably fast.
Then I tried to make the code generic so that I could use it for an arbitrary number of predictors (I tried to code it the way the manual suggests in the section on Linear Regression). While this still gave me the same parameter estimates (so something must be right), it took much longer (~3 times slower). I’m not sure why and whether there is anything I can do to speed it up?
Original model:
data {
int datp; //number of data points
vector[datp] y; //reaction time
int nsub; //number of subjects
int ntrs[nsub]; //number of trials for each person
int<lower=1> nregs; //number of regressors in this design (including constant)
matrix[datp,nregs] x; //design matrix - format: column 1:intercept, column 2: regressor, rows:datapoints for all subjects concatenated
int subID[datp]; //which subject each datapoint belongs to
}
parameters {
// group level inference parameters
vector[2] beta_mu; // constant (intercept) and effect of condition (slope)
// structured noise: differences between individual subjects
real<lower=0> sigma_e; //error sd per measurement
vector<lower=0>[2] sigma; //individual differences between people
vector[nsub] betas0; //RT adjustment for each person (intercept)
vector[nsub] betas1; //RT adjustment for slope
}
model {
real mu;
betas0 ~ normal(0,sigma[1]);
betas1 ~ normal(0,sigma[2]);
// Run regression
for (i in 1:datp){
mu = (beta_mu[1] + betas0[subID[i]])*x[i,1] + (beta_mu[2] + betas1[subID[i]])*x[i,2];
y[i] ~ lognormal(mu,sigma_e);
}
}
Here is the new model:
data {
int datp; //number of data points
vector[datp] y; //reaction time
int nsub; //number of subjects
int ntrs[nsub]; //number of trials for each person
int<lower=1> nregs; //number of regressors in this design (including constant)
matrix[datp,nregs] x; //design matrix - format: column 1:intercept, column 2: regressor, rows:datapoints for all subjects concatenated
int subID[datp]; //which subject each datapoint belongs to
}
parameters{
vector[nregs] beta_mu; // group level paras
vector<lower=0>[nregs] sigma; // individual differences
real<lower=0> sigma_e; //error sd per measurement
vector[nsub] betasRaw[nregs];
}
// To do a non-centered distribution,this speeds up the sampling
transformed parameters{
vector[nsub] betas[nregs];
for (ir in 1:nregs){
betas[ir] = beta_mu[ir]+sigma[ir]*betasRaw[ir];}
}
// This block runs the actual model
model {
real mu;
//Individual subject
for (ir in 1:nregs){
betasRaw[ir] ~ normal(0,1);
}
// Run the regression
for (i in 1:datp){
mu=to_vector(betas[:,subID[i]])’*x[i]’;
y[i] ~ lognormal(mu,sigma_e);
}
}