I have tried reparameterizing the model. I am not sure why, but it works much better now.
I did the following: I generated data that looks like this:
| gene | cond A | cond A | cond A | cond B | cond B | cond B |
|----------|--------|--------|--------|--------|--------|--------|
| gene1 | 4 | 18 | 9 | 63 | 87 | 103 |
| gene2 | 79 | 163 | 144 | 74 | 64 | 47 |
| ... | . | . | . | . | . | . |
| gene60 | 15 | 18 | 32 | 4 | 1 | 0 |
In the transformed data block I cut the matrix in half and stored condition A (called input) and B (called ip) in different separate matrices with equal dimensions.
I then estimated a (row-wise) model for condition A with the observations following a Negative-Binomial Distribution with one common \mu and one common \phi. The rows of Condition B are distributed with \mu \cdot \delta0 and \phi or with \mu \cdot \delta1 and \phi with 0 < \delta0 < 1 < \delta1.
For this model, the estimation works pretty well.
I successively added more parameters and in the end allowed every row to have a different \mu_i, \delta0_i, \delta1_i, \phi_i (they have a common prior, however).
The estimation also seemed reasonable, with one exception: the \delta0_i were all estimated to be around 0.55 - 0.75 and the \delta1_i were all estimated to be between 6 - 8, more or less regardless of posterior membership probability. I assume this has to do with the fact that I have to estimate one \delta0_i and one \delta1_i even though in reality there is only one of them observed. However, I am not sure why they all default to the values they do (i.e. 6-8), as the prior for \delta1 has a maximum in 4 (even though it is rather flat) and I would assume it should center there if there is little information. (Edit: I think think this has to do with the fact that I truncate the distributions so the actual mean is higher than 4 if I cut out everything lower than 1).
Would you have an idea how to formulate the model as to avoid this problem?
edit:
maybe the above is not actually a problem and the better question is: can I just compute my posterior estimate for \delta_i by multiplying the posterior probability \pi_i \cdot \delta1_i + (1-\pi_i) \cdot \delta0_i?
Thank you very much!
full code:
generatemixedcounts <- function(n = 6, N = 50, phi = 2, mu = 10, deltaeffect = 4,
meth_prob = 0.25){
## select some of the entries of the matrix
m <- matrix(NA, nrow = N, ncol = n)
m[] <- rnbinom(n = n*N, mu = mu, size = phi)
conditions <- rep(c(F, T), each = n/2)
select <- sample(c(T, F), size = N, replace = T, prob = c(meth_prob, 1- meth_prob))
m[select, conditions] <- rnbinom(n = sum(select) * sum(conditions),
mu = mu * deltaeffect, size = size)
return(list(m = m, select = select, conditions = conditions))
}
toy <- generatemixedcounts(N = 60, mu = 10, deltaeffect = 4, n = 6, phi = 2)
standata <- list(y = toy$m, N = nrow(toy$m), n = ncol(toy$m),
ip = toy$conditions)
truepi <- sum(toy$select)/60; truepi
write("
data {
int<lower=0> N;
int<lower=0> n;
int<lower=0> y[N,n];
int<lower = 0, upper = 1> ip[n];
}
transformed data {
// idea is to make one matrix for every condition
int m = n/2; //number of samples per condition input/ip
int k = 1; int l = 1;
int<lower=0> y_input[N,m];
int<lower=0> y_ip[N,m];
for (j in 1:n){
if (ip[j] == 0){
for (i in 1:N){
y_input[i,k] = y[i,j];
}
k+=1;
} else {
for (i in 1:N){
y_ip[i,l] = y[i,j];
}
l+=1;
}
}
}
parameters {
real <lower=0, upper=1> pi;
real <lower = 0> mu;
real<lower=0> phi;
real <lower = 0, upper = 1> delta0;
real <lower = 1> delta1;
}
model {
vector[2] contributions;
for (j in 1:m){
for (i in 1:N) {
//distribution of y_input
target += neg_binomial_2_lpmf(y_input[i,j] | mu, phi);
//distribution of y_ip
contributions[1] = log(pi) + neg_binomial_2_lpmf(y_ip[i,j] | mu * delta1, phi);
contributions[2] = log(1 - pi) + neg_binomial_2_lpmf(y_ip[i,j] | mu * delta0, phi);
target += log_sum_exp(contributions);
}
}
delta1 ~ normal(4, 10);
delta0 ~ normal(1, 1);
mu ~ normal(10,2);
phi ~ normal(2, 0.8);
}
// posterior membership probabilities
generated quantities {
real pi_post[N];
for (i in 1:N) {
pi_post[i] = exp(neg_binomial_2_lpmf(y_ip[i,] | mu * delta1, phi)) * pi / (exp(neg_binomial_2_lpmf(y_ip[i,] | mu * delta1, phi)) * pi + exp(neg_binomial_2_lpmf(y_ip[i,] | mu * delta0, phi)) * (1 - pi));
}
}
", file = "./mixturenegbin_delta_reduced.stan")
write("
data {
int<lower=0> N;
int<lower=0> n;
int<lower=0> y[N,n];
int<lower = 0, upper = 1> ip[n];
}
transformed data {
int m = n/2; //number of samples per condition input/ip
int k = 1; int l = 1;
int<lower=0> y_input[N,m];
int<lower=0> y_ip[N,m];
for (j in 1:n){
if (ip[j] == 0){
for (i in 1:N){
y_input[i,k] = y[i,j];
}
k+=1;
} else {
for (i in 1:N){
y_ip[i,l] = y[i,j];
}
l+=1;
}
}
}
parameters {
real <lower=0, upper=1> pi;
real <lower = 0> mu[N];
real<lower=0> phi[N];
real <lower = 0, upper = 1> delta0[N];
real <lower = 1> delta1[N];
}
model {
vector[2] contributions;
for (j in 1:m){
for (i in 1:N) {
//distribution of y_input
target += neg_binomial_2_lpmf(y_input[i,j] | mu[i], phi[i]);
//distribution of y_ip
contributions[1] = log(pi) + neg_binomial_2_lpmf(y_ip[i,j] | mu[i] * delta1[i], phi[i]);
contributions[2] = log(1 - pi) + neg_binomial_2_lpmf(y_ip[i,j] | mu[i] * delta0[i], phi[i]);
target += log_sum_exp(contributions);
}
}
delta1 ~ normal(4, 10);
delta0 ~ normal(1, 1);
mu ~ normal(10,2);
phi ~ normal(2, 0.8);
}
// posterior membership probabilities
generated quantities {
real pi_post[N];
for (i in 1:N) {
pi_post[i] = exp(neg_binomial_2_lpmf(y_ip[i,] | mu[i] * delta1[i], phi[i])) * pi / (exp(neg_binomial_2_lpmf(y_ip[i,] | mu[i] * delta1[i], phi[i])) * pi + exp(neg_binomial_2_lpmf(y_ip[i,] | mu[i] * delta0[i], phi[i])) * (1 - pi));
}
}
", file = "./mixturenegbin_delta_full.stan")
## Bayesian Estimation
stanfit_onepi_reduced <- rstan::stan(file = "./mixturenegbin_delta_reduced.stan",
data = standata,
iter = 1000, warmup = 500, thin = 1, control = list(adapt_delta = 0.99))
stanfit_onepi_reduced
stanfit_onepi_full <- rstan::stan(file = "./mixturenegbin_delta_full.stan",
data = standata,
iter = 1000, warmup = 500, thin = 1, control = list(adapt_delta = 0.99))
stanfit_onepi_full