Improve performance modelling subset of observations

Hi,

I’m new to Stan and am not sure if I’m specifying the model correctly. Each participant were shown a binary sequence, and they were asked to predict the next item every 5 trials. In the following code, I tried to use a sliding window to predict p1 (probability of item 1) using simple beta updating (first for loop in model). And then I try to use a subset of the predicted p1 sequence to fit the actual rating data (y_obs) in the 2nd for loop in model. I’m trying to treat it as the missing data problem following the stan manual. However, the 2nd loop significantly slowed down the process. I’m wondering if there’s another way to achieve the same objective while improving performance? Many thanks.

// beta fixed frequency
data {
    int<lower=0> N_obs; // number of trials
    int<lower=0> N_mis; // number of rated trials
    int<lower=0, upper=N_obs+N_mis> ii_obs[N_obs]; // rated trial indices
    int<lower=0, upper=N_obs+N_mis> ii_mis[N_mis]; // rated trial indices
    vector[N_obs+N_mis] seq; // sequence
    real y_obs[N_obs]; // ratings
    int<lower=1> window;

}

transformed data {
    int N1_0;
    int<lower=0> T = N_mis+N_obs;
    N1_0 = 0;
}

parameters {
    real<lower=0> alpha0;
    real<lower=0> beta0;
    real<lower=0> sigma;
    real<lower=0, upper=1> p1[T];
    real y_mis[N_mis];
    real mu_itc;
    real mu_slope;
}

transformed parameters {
    real y[T];
    y[ii_obs] = y_obs;
    y[ii_mis] = y_mis;
}

model {    
    real mu;
    int N1;
    vector[window] u_slice;

    alpha0 ~ normal(0, 1);
    beta0 ~ normal(0, 10);
    for (t in window:T) {
        u_slice = seq[t-window+1:t];
        N1 = N1_0;

        for (i in 1:window)
            if (u_slice[i]==1)
                N1 += 1;
        target += beta_lpdf( p1[t] | alpha0, beta0);
        target += binomial_lpmf( N1 | window, p1[t]);
    }

    for (ii in ii_obs) {
        mu = mu_itc + mu_slope*p1[ii];
        target += normal_lpdf(y[ii] | mu, sigma);
    }

}

I got this to compile but I’m not sure if it will run without data. The main jist is, instead of computing N1 in the model block, which has to run every iteration, you can make an array of ints that correspond to each window in transformed parameters. Generally anything that is just data you should do in transformed parameters so it only runs once. Then you can use the vectorized code to work over vectors. Though I’m pretty sure I didn’t do the sizes correctly in the below I hope it gives an idea of what I’m talking about above

// beta fixed frequency
data {
    int<lower=0> N_obs; // number of trials
    int<lower=0> N_mis; // number of rated trials
    int<lower=0, upper=N_obs+N_mis> ii_obs[N_obs]; // rated trial indices
    int<lower=0, upper=N_obs+N_mis> ii_mis[N_mis]; // rated trial indices
    vector[N_obs+N_mis] seq; // sequence
    real y_obs[N_obs]; // ratings
    int<lower=1> window;

}

transformed data {
    int N1_0 = 0;
    int N1_iter = 1;
    int<lower=0> T = N_mis+N_obs;
    // not sure if this is right size
    int N1_size = T - window;
    vector[window] u_slice;
    int N1[N1_size] = rep_array(0, N1_size);
    for (t in window:T) {
        u_slice = seq[(t - window + 1):t];
        for (i in 1:window) {
            if (u_slice[i]==1) {
                N1[N1_iter] += 1;
            }
        }
    }

}

parameters {
    real<lower=0> alpha0;
    real<lower=0> beta0;
    real<lower=0> sigma;
    vector<lower=0, upper=1>[T] p1;
    real y_mis[N_mis];
    real mu_itc;
    real mu_slope;
}

transformed parameters {
    real y[T];
    vector[N1_size] p1_sub = p1[window:T];
    y[ii_obs] = y_obs;
    y[ii_mis] = y_mis;
}

model {    
    vector[N_obs] mu = mu_itc + mu_slope*p1[ii_obs];
    alpha0 ~ normal(0, 1);
    beta0 ~ normal(0, 10);
    // More efficient would be
    // p1 ~ beta(alpha0, beta0);
    // N1 ~ binomial(window, p1);
    // etc.
    // unless you need to not drop the constant term
    target += beta_lpdf(p1 | alpha0, beta0);
    target += binomial_lpmf(N1 | window, p1);
    target += normal_lpdf(y[ii_obs] | mu, sigma);

}

Hi Steve,

Many thanks for your reply. It’s definitely a bit faster now that the loop is in transformed data. I adjusted the initial windows to avoid N_size calculation. Full code below.

When I use y~normal(…), I get the following warning:

Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
    y[ii_obs] ~ normal(...)

Is there a page where you can point me to that explains the difference between these two methods? Thanks

// beta fixed frequency (updated)
data {
    int<lower=0> N_obs; // number of trials
    int<lower=0> N_mis; // number of rated trials
    int<lower=0, upper=N_obs+N_mis> ii_obs[N_obs]; // rated trial indices
    int<lower=0, upper=N_obs+N_mis> ii_mis[N_mis]; // rated trial indices
    vector[N_obs+N_mis] seq; // sequence
    real y_obs[N_obs]; // ratings
    int<lower=1> window;
}

transformed data {
    int N1_0 = 0;
    int<lower=0> T = N_mis+N_obs;
    int N1_iter = 1;
    int N1_size = T;
    vector[window] u_slice;
    int N1[N1_size] = rep_array(0, N1_size);

    for (t in 1:T) {
        if (t<window) {
            for (i in 1:t)
                if (seq[i]==1) {
                    N1[N1_iter] += 1;
                }
        }
        else {
            u_slice = seq[t-window+1:t];
            for (i in 1:window)
                if (u_slice[i]==1) {
                    N1[N1_iter] += 1;
                }
        }
        N1_iter += 1;
    }
}

parameters {
    real<lower=0> alpha0;
    real<lower=0> beta0;
    real<lower=0> sigma;
    vector<lower=0, upper=1>[T] p1;
    real y_mis[N_mis];
    real mu_itc;
    real mu_slope;
}

transformed parameters {
    real y[T];
    y[ii_obs] = y_obs;
    y[ii_mis] = y_mis;
}

model {    
    vector[N_obs] mu = mu_itc + mu_slope*p1[ii_obs];
    alpha0 ~ normal(0, 1);
    beta0 ~ normal(0, 10);
    // target += beta_lpdf( p1 | alpha0, beta0);
    // target += binomial_lpmf( N1 | window, p1);
    // target += normal_lpdf(y[ii_obs] | mu, sigma);
    p1 ~ beta(alpha0, beta0);
    N1 ~ binomial(window, p1);
    y[ii_obs] ~ normal(mu, sigma);
}