Hi all,
My team and I have a data set related to voting that contains four variables measured across several countries: accuracy (A
), coverage (C
), completeness (K
), and register entries (R
). We know that A, C, and K can take any value between 0 and 1 and that R can take any positive value.
Each variable contains missing cases. Our aim is to impute the missing values, but to do so subject to the following known constraints:
A = (K * (R - d)) / (R*C)
C = (((1 - A)*R-d) / (R*A/K)) + K
K = (A * R * C) / (R - d)
Where d
is an additional unmeasured variable defined as:
d = R - ((A * R * C) / K)
I have got to the point where my program models A, C, K, and R accounting for missingness. My code is as follows:
functions {
}
data {
// Model data
int<lower = 1> N; // Sample size
int<lower = 1> J; // Number of countries
int ctry[N]; // Vector of countries
// Accuracy data
vector[N] A; // Data
int<lower=0> A_nmi; // Number of missing cases
int<lower=1> A_pmi[A_nmi]; // Position of missing cases
// Completeness data
vector[N] K; // Data
int<lower=0> K_nmi; // Number of missing cases
int<lower=1> K_pmi[K_nmi]; // Position of missing cases
// Completeness data
vector[N] C; // Data
int<lower=0> C_nmi; // Number of missing cases
int<lower=1> C_pmi[C_nmi]; // Position of missing cases
// Register entry data
vector[N] R; // Data
int<lower=0> R_nmi; // Number of missing cases
int<lower=1> R_pmi[R_nmi]; // Position of missing cases
}
transformed data {
}
parameters {
// Accuracy parameters
vector[J] A_alpha;
real A_alpha_bar;
real<lower = 0> A_sigma;
real<lower = 0> A_phi;
// Completeness parameters
vector[J] K_alpha;
real K_alpha_bar;
real<lower = 0> K_sigma;
real<lower = 0> K_phi;
// Coverage parameters
vector[J] C_alpha;
real C_alpha_bar;
real<lower = 0> C_sigma;
real<lower = 0> C_phi;
// Register entry parameters
vector[J] R_alpha;
real R_alpha_bar;
real<lower = 0> R_sigma;
real<lower = 0> R_sigma_pop;
// Create missing data parameters
vector<lower = 0, upper = 1>[A_nmi] A_na;
vector<lower = 0, upper = 1>[K_nmi] K_na;
vector<lower = 0, upper = 1>[C_nmi] C_na;
vector<lower = 0>[R_nmi] R_na;
}
transformed parameters {
}
model {
// Create vectors that combine observed data and imputed parameters
vector[N] A_y = A;
vector[N] K_y = K;
vector[N] C_y = C;
vector[N] R_y = R;
// Mus
vector[N] mu_A;
vector[N] mu_K;
vector[N] mu_C;
vector[N] mu_R;
for(n in 1:N){
mu_A[n] = A_alpha[ctry[n]];
mu_A[n] = inv_logit(mu_A[n]);
}
for(n in 1:N){
mu_K[n] = K_alpha[ctry[n]];
mu_K[n] = inv_logit(mu_K[n]);
}
for(n in 1:N){
mu_C[n] = C_alpha[ctry[n]];
mu_C[n] = inv_logit(mu_C[n]);
}
for(n in 1:N){
mu_R[n] = R_alpha[ctry[n]];
}
// Combine missing parameters into observed data
A_y[A_pmi] = A_na;
K_y[K_pmi] = K_na;
C_y[C_pmi] = C_na;
R_y[R_pmi] = R_na;
// Accuracy priors
target += normal_lpdf(A_alpha | A_alpha_bar, A_sigma);
target += normal_lpdf(A_alpha_bar | 0, 1.5);
target += exponential_lpdf(A_sigma | 2);
target += gamma_lpdf(A_phi | 1, 0.01);
// Completeness priors
target += normal_lpdf(K_alpha | K_alpha_bar, K_sigma);
target += normal_lpdf(K_alpha_bar | 0, 1.5);
target += exponential_lpdf(K_sigma | 2);
target += gamma_lpdf(K_phi | 1, 0.01);
// Coverage priors
target += normal_lpdf(C_alpha | C_alpha_bar, C_sigma);
target += normal_lpdf(C_alpha_bar | 0, 1.5);
target += exponential_lpdf(C_sigma | 2);
target += gamma_lpdf(C_phi | 1, 0.01);
// Register entry priors
target += normal_lpdf(R_alpha | R_alpha_bar, R_sigma);
target += normal_lpdf(R_alpha_bar | 0, 1.5);
target += exponential_lpdf(R_sigma | 1);
target += exponential_lpdf(R_sigma_pop | 1);
// Likelihoods
target += beta_lpdf(A_y | mu_A * A_phi, (1 - mu_A) * A_phi);
target += beta_lpdf(K_y | mu_K * K_phi, (1 - mu_K) * K_phi);
target += beta_lpdf(C_y | mu_C * C_phi, (1 - mu_C) * C_phi);
target += lognormal_lpdf(R_y | mu_R, R_sigma_pop);
}
generated quantities {
}
What I don’t know is how to include the constraints on the imputed missing values. Previously, I had tried defining the missing data parameters (e.g. R_na
) in the model block, but couldn’t work out how to do so without throwing up initialisation errors or initialising in such a way that it seemed to affect my model output.
Any help greatly welcomed.