This is the same code, sorry for the duplicate posting, I just keep encountering different types of problems.
My H matrix, ideally is an indicator matrix filled with 0 and 1, and that is what I really want. When I generate the simulated data, the real H is:
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 1 1 1 0 1 1 0
[2,] 1 1 0 0 1 1 0
[3,] 1 0 1 0 1 1 0
[4,] 0 1 1 1 1 1 0
[5,] 1 0 1 0 1 1 0
[6,] 0 0 0 0 1 1 0
[7,] 0 0 0 0 1 1 0
[8,] 1 0 1 0 1 1 0
[9,] 1 0 0 0 1 1 0
[10,] 1 1 0 0 1 1 0
[11,] 1 0 1 1 0 0 1
[12,] 1 0 1 0 0 0 1
[13,] 0 1 0 1 0 0 1
[14,] 1 0 1 0 0 0 1
[15,] 0 0 0 1 0 0 1
[16,] 1 1 0 0 0 0 1
[17,] 0 1 0 0 0 0 1
[18,] 0 1 0 0 0 0 1
[19,] 1 0 0 1 0 0 1
I figured it out now, through asking a series of questions, that Stan cannot estimate such a discrete matrix, then I relaxed my requirement. I used some continuous prior that can introduce sparsity instead of the spike and slab. But as you can see, there is some structure in the H matrix. The first 3 columns have randomly distributed 0 and 1, column 5 and 6 have all 1s for the first few rows and 0s for the bottom few rows and the 7th column has 0s for the first few rows and 1s for the bottom few rows. In order to reflect such a structure on the prior, I used an uniform prior for column 5,6,7. But when I used code like this
for(j in (k+1):(k+k1))
for(i in 1:p)
H[i,j] ~ uniform(0.9,1);
for(j in (k+1):(k+k1))
for(i in (p+1):D) H[i,j] ~ uniform(0,0.1);
for(j in (k+k1+1):K)
for(i in 1:p) H[i,j] ~ uniform(0,0.1);
for(j in (k+k1+1):K)
for(i in (p+1):D) H[i,j] ~ uniform(0.9,1);
It does not work. It will report Log probability evaluates to log(0), i.e. negative infinity."
" Stan can’t start sampling from this initial value." Only by using
for(j in (k+1):(k+k1))
for(i in 1:p)
H[i,j] ~ uniform(-1,1);
for(j in (k+1):(k+k1))
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);
for(j in (k+k1+1):K)
for(i in 1:p) H[i,j] ~ uniform(-1,1);
for(j in (k+k1+1):K)
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);
The code will run. But this is not what I want, the estimate from this set up is far from accurate. I wonder how to fix this problem.
Attached please is my simulated data and code:
Generate Data
library(MASS)
p=10; q=9;D=p+q;
k=4; k1=2; k2=1; K=k+k1+k2;
n=100;
mu_z <- rep(0,K)
Sigma_z <- matrix(0, nrow=K, ncol=K) + diag(K)*1
Z <- mvrnorm(n, mu_z, Sigma_z)
H <- matrix(nrow=D, ncol=K)
set.seed(1)
for(j in 1:k)
{H[,j] = rbinom(D, 1, 2/(j+2))}
for(j in (k+1):(k+k1))
{ for(i in 1:p) H[i,j]=1
for(i in (p+1):D) H[i,j]=0 }
for(j in (k+k1+1):K)
{ for(i in 1:p) H[i,j] = 0
for(i in (p+1):D) H[i,j] = 1}
set.seed(1)
E_mux1 <- runif(K, min =.01, max = 1)
E_mux <- sort(E_mux1, decreasing = TRUE)
set.seed(1)
e_x <- runif(K, min = .5, max = 5)
E_sigmax <- diag(K)*e_x
set.seed(1)
E_x <- mvrnorm(p, E_mux, E_sigmax)
set.seed(1)
E_muy1 <- runif(K, min =5, max = 40)
E_muy <- sort(E_muy1, decreasing = TRUE)
set.seed(1)
e_y <- runif(K, min = .5, max = 20)
E_sigmay <- diag(K)*e_y
set.seed(1)
E_y <- mvrnorm(q, E_muy, E_sigmay)
E <- rbind(E_x, E_y)
Phi <- E*H
Theta <- Z%*%t(Phi)
Theta_x <- Theta[,1:10]
Theta_y <- Theta[,11:19]
X_p <- matrix(nrow=n, ncol=p)
{for(j in 1:p)
{ for(i in 1:n)
{X_p[i,j] = exp(Theta_x[i,j])/(1+exp(Theta_x[i,j]))}}}
set.seed(1)
X<- matrix(nrow=n, ncol=p)
{for(j in 1:p)
for(i in 1:n) {X[i,j] = rbinom(1, 2, X_p[i,j])}}
Y_mu <- matrix(nrow=n, ncol=q)
{for(j in 1:q)
for(i in 1:n) {Y_mu[i,j] = Theta_y[i,j]}}
set.seed(1)
Y<- matrix(nrow=n, ncol=q)
{for(j in 1:q)
{ for(i in 1:n)
{Y[i,j] = rnorm(1,Theta_y[i,j], sd=1/2)}}}
stan code
library(rstan)
library(coda)
SBECCA_mod <- '
data {
int<lower=1> n;
int<lower=1> p;
int<lower=1> q;
int<lower=1> k;
int<lower=1> k1;
int<lower=1> k2;
int<lower=1> D;
int<lower=1> K;
int<lower=0, upper=2> X[n,p];
real Y[n,q];
vector[K] mu_z;
cov_matrix[K] Sigma_z;
}
parameters {
matrix<lower=-1,upper=1>[D,K] H;
matrix[D,K] E;
matrix[n,K] Z;
matrix<lower=0>[D,k] Pi;
real<lower=0> alpha[K]; # top part of E
real<lower=0> beta[K]; # bottom part of E
real<lower=0> sigma[q]; # modeling data Y, Y is normally distributed
}
transformed parameters {
matrix[D,K] Phii;
matrix[n,D] Theta;
matrix[n,p] Theta_x;
matrix[n,q] Theta_y;
matrix[n,p] p_x;
matrix[n,q] mu_y;
Phii <- E .* H;
Theta <- Z*transpose(Phii);
for(i in 1:n)
{for(j in 1:p)
Theta_x[i,j] <- Theta[i,j];
for(l in 1:q)
Theta_y[i,l] <- Theta[i,l+p];}
{for(i in 1:n)
for(j in 1:p)
p_x[i,j] <- exp(Theta_x[i,j])/(1+exp(Theta_x[i,j]));}
{for(i in 1:n)
for(l in 1:q)
mu_y[i,l] <- Theta_y[i,l];}
}
model {
Prior
for Z
for(i in 1:n)
{Z[i] ~ multi_normal(mu_z, Sigma_z);}
for alpha, beta (E)
for(j in 1:K)
{alpha[j] ~ inv_gamma(0.001, 0.001);
beta[j] ~ inv_gamma(0.001, 0.001);} ## This is just an initial value
############################################################################### for H
for(i in 1:D)
for(j in 1:k)
Pi[i,j] ~ exponential(2);
for(i in 1:D)
for(j in 1:k)
H[i,j] ~ normal(0,Pi[i,j]);
for(j in (k+1):(k+k1))
for(i in 1:p)
H[i,j] ~ uniform(-1,1);
for(j in (k+1):(k+k1))
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);
for(j in (k+k1+1):K)
for(i in 1:p) H[i,j] ~ uniform(-1,1);
for(j in (k+k1+1):K)
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);
############################################################################ for H
for(j in 1:K)
for(i in 1:p)
E[i,j] ~ normal(0, alpha[j]);
for(j in 1:K)
for(l in 1:q)
E[l,j] ~ normal(0, beta[j]);
for variance of y
for(j in 1:q)
sigma[j] ~ inv_gamma(0.01, 0.01);
Generate Data
for(i in 1:n)
{
for(j in 1:p)
{
X[i,j] ~ binomial(2,p_x[i,j]);
}
for(l in 1:q)
{
Y[i,l] ~ normal(mu_y[i,l], sigma[l]);
}
}
}
'
SBECCA_dat<-list(“X”=X,“Y”=Y, “mu_z” = rep(0,7), “Sigma_z” = diag(rep(1,7)),“n”=100,“p”=10,“q”=9,“k”=4, “k1”=2, “k2”=1, “D”= 19, “K”=7)
happyfit3 <- stan(model_code = SBECCA_mod, data = SBECCA_dat, thin=3, iter = 100, chains =2)