Estimate Structured Matrix

This is the same code, sorry for the duplicate posting, I just keep encountering different types of problems.
My H matrix, ideally is an indicator matrix filled with 0 and 1, and that is what I really want. When I generate the simulated data, the real H is:
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 1 1 1 0 1 1 0
[2,] 1 1 0 0 1 1 0
[3,] 1 0 1 0 1 1 0
[4,] 0 1 1 1 1 1 0
[5,] 1 0 1 0 1 1 0
[6,] 0 0 0 0 1 1 0
[7,] 0 0 0 0 1 1 0
[8,] 1 0 1 0 1 1 0
[9,] 1 0 0 0 1 1 0
[10,] 1 1 0 0 1 1 0
[11,] 1 0 1 1 0 0 1
[12,] 1 0 1 0 0 0 1
[13,] 0 1 0 1 0 0 1
[14,] 1 0 1 0 0 0 1
[15,] 0 0 0 1 0 0 1
[16,] 1 1 0 0 0 0 1
[17,] 0 1 0 0 0 0 1
[18,] 0 1 0 0 0 0 1
[19,] 1 0 0 1 0 0 1
I figured it out now, through asking a series of questions, that Stan cannot estimate such a discrete matrix, then I relaxed my requirement. I used some continuous prior that can introduce sparsity instead of the spike and slab. But as you can see, there is some structure in the H matrix. The first 3 columns have randomly distributed 0 and 1, column 5 and 6 have all 1s for the first few rows and 0s for the bottom few rows and the 7th column has 0s for the first few rows and 1s for the bottom few rows. In order to reflect such a structure on the prior, I used an uniform prior for column 5,6,7. But when I used code like this
for(j in (k+1):(k+k1))
for(i in 1:p)
H[i,j] ~ uniform(0.9,1);
for(j in (k+1):(k+k1))
for(i in (p+1):D) H[i,j] ~ uniform(0,0.1);

for(j in (k+k1+1):K)
for(i in 1:p) H[i,j] ~ uniform(0,0.1);
for(j in (k+k1+1):K)
for(i in (p+1):D) H[i,j] ~ uniform(0.9,1);
It does not work. It will report Log probability evaluates to log(0), i.e. negative infinity."
" Stan can’t start sampling from this initial value." Only by using
for(j in (k+1):(k+k1))
for(i in 1:p)
H[i,j] ~ uniform(-1,1);
for(j in (k+1):(k+k1))
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);

for(j in (k+k1+1):K)
for(i in 1:p) H[i,j] ~ uniform(-1,1);
for(j in (k+k1+1):K)
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);
The code will run. But this is not what I want, the estimate from this set up is far from accurate. I wonder how to fix this problem.
Attached please is my simulated data and code:

Generate Data

library(MASS)

p=10; q=9;D=p+q;
k=4; k1=2; k2=1; K=k+k1+k2;
n=100;

mu_z <- rep(0,K)
Sigma_z <- matrix(0, nrow=K, ncol=K) + diag(K)*1

Z <- mvrnorm(n, mu_z, Sigma_z)
H <- matrix(nrow=D, ncol=K)

set.seed(1)
for(j in 1:k)
{H[,j] = rbinom(D, 1, 2/(j+2))}

for(j in (k+1):(k+k1))
{ for(i in 1:p) H[i,j]=1
for(i in (p+1):D) H[i,j]=0 }

for(j in (k+k1+1):K)
{ for(i in 1:p) H[i,j] = 0
for(i in (p+1):D) H[i,j] = 1}

set.seed(1)
E_mux1 <- runif(K, min =.01, max = 1)
E_mux <- sort(E_mux1, decreasing = TRUE)
set.seed(1)
e_x <- runif(K, min = .5, max = 5)
E_sigmax <- diag(K)*e_x

set.seed(1)
E_x <- mvrnorm(p, E_mux, E_sigmax)
set.seed(1)
E_muy1 <- runif(K, min =5, max = 40)
E_muy <- sort(E_muy1, decreasing = TRUE)
set.seed(1)
e_y <- runif(K, min = .5, max = 20)
E_sigmay <- diag(K)*e_y
set.seed(1)
E_y <- mvrnorm(q, E_muy, E_sigmay)
E <- rbind(E_x, E_y)

Phi <- E*H

Theta <- Z%*%t(Phi)
Theta_x <- Theta[,1:10]
Theta_y <- Theta[,11:19]

X_p <- matrix(nrow=n, ncol=p)
{for(j in 1:p)
{ for(i in 1:n)
{X_p[i,j] = exp(Theta_x[i,j])/(1+exp(Theta_x[i,j]))}}}
set.seed(1)
X<- matrix(nrow=n, ncol=p)
{for(j in 1:p)
for(i in 1:n) {X[i,j] = rbinom(1, 2, X_p[i,j])}}

Y_mu <- matrix(nrow=n, ncol=q)
{for(j in 1:q)
for(i in 1:n) {Y_mu[i,j] = Theta_y[i,j]}}

set.seed(1)
Y<- matrix(nrow=n, ncol=q)
{for(j in 1:q)
{ for(i in 1:n)
{Y[i,j] = rnorm(1,Theta_y[i,j], sd=1/2)}}}

stan code

library(rstan)
library(coda)
SBECCA_mod <- '
data {
int<lower=1> n;
int<lower=1> p;
int<lower=1> q;
int<lower=1> k;
int<lower=1> k1;
int<lower=1> k2;
int<lower=1> D;
int<lower=1> K;
int<lower=0, upper=2> X[n,p];
real Y[n,q];
vector[K] mu_z;
cov_matrix[K] Sigma_z;
}
parameters {
matrix<lower=-1,upper=1>[D,K] H;
matrix[D,K] E;
matrix[n,K] Z;
matrix<lower=0>[D,k] Pi;
real<lower=0> alpha[K]; # top part of E
real<lower=0> beta[K]; # bottom part of E
real<lower=0> sigma[q]; # modeling data Y, Y is normally distributed
}

transformed parameters {
matrix[D,K] Phii;
matrix[n,D] Theta;
matrix[n,p] Theta_x;
matrix[n,q] Theta_y;
matrix[n,p] p_x;
matrix[n,q] mu_y;

Phii <- E .* H;
Theta <- Z*transpose(Phii);

for(i in 1:n)
{for(j in 1:p)
Theta_x[i,j] <- Theta[i,j];

for(l in 1:q)
Theta_y[i,l] <- Theta[i,l+p];}

{for(i in 1:n)
for(j in 1:p)
p_x[i,j] <- exp(Theta_x[i,j])/(1+exp(Theta_x[i,j]));}

{for(i in 1:n)
for(l in 1:q)
mu_y[i,l] <- Theta_y[i,l];}
}
model {

Prior

for Z

for(i in 1:n)
{Z[i] ~ multi_normal(mu_z, Sigma_z);}

for alpha, beta (E)

for(j in 1:K)
{alpha[j] ~ inv_gamma(0.001, 0.001);
beta[j] ~ inv_gamma(0.001, 0.001);} ## This is just an initial value

############################################################################### for H
for(i in 1:D)
for(j in 1:k)
Pi[i,j] ~ exponential(2);
for(i in 1:D)
for(j in 1:k)
H[i,j] ~ normal(0,Pi[i,j]);

for(j in (k+1):(k+k1))
for(i in 1:p)
H[i,j] ~ uniform(-1,1);
for(j in (k+1):(k+k1))
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);

for(j in (k+k1+1):K)
for(i in 1:p) H[i,j] ~ uniform(-1,1);
for(j in (k+k1+1):K)
for(i in (p+1):D) H[i,j] ~ uniform(-1,1);
############################################################################ for H

for(j in 1:K)
for(i in 1:p)
E[i,j] ~ normal(0, alpha[j]);

for(j in 1:K)
for(l in 1:q)
E[l,j] ~ normal(0, beta[j]);

for variance of y

for(j in 1:q)
sigma[j] ~ inv_gamma(0.01, 0.01);

Generate Data

for(i in 1:n)
{
for(j in 1:p)
{
X[i,j] ~ binomial(2,p_x[i,j]);
}
for(l in 1:q)
{
Y[i,l] ~ normal(mu_y[i,l], sigma[l]);
}
}
}
'
SBECCA_dat<-list(“X”=X,“Y”=Y, “mu_z” = rep(0,7), “Sigma_z” = diag(rep(1,7)),“n”=100,“p”=10,“q”=9,“k”=4, “k1”=2, “k2”=1, “D”= 19, “K”=7)
happyfit3 <- stan(model_code = SBECCA_mod, data = SBECCA_dat, thin=3, iter = 100, chains =2)

If you have uniforms, the parameters need to be declared with the same bounds. Otherwise, unconstrained values will lead to illegal constrained values, and Stan will freeze because it can’t find parameter values randomly that satisfy constraints or it’ll revert to random walks when it steps out of support.

You can use three back-ticks to

```
set off code. One before and one after.

Stan’s not really set up to do this kind of hard choice. Aki’s been writing parameters on softer forms of variable selection.

Can you elaborate this part a little bit??

You can use three back-ticks to

set off code. One before and one after.

You got the quote before the code, but not the one after. You want it to look like this:

```
double a = 10;
a *= 3;
```

and then it’ll render as

double a = 10;
a *= 3;

Do you think it is possible to marginalize H matrix element-wise given that H has such a structure?

I received a lot of suggestions on marginalizing discrete parameters on the previous question I asked, but it is a simplified situation, the real situation I need to deal with is like this (structured H). Do you think it is possible to marginalize this?

I know that this is technically not a Stan question, but I just feel you guys have a much broader knowledge than merely “stan”, would like to get some suggestion from you! .^ ^.

When the discrete parameters are organized along with the data, then it’s possible. When the discrete parameters interact with each (i.e., they’re not all conditionally independent), as in variable selection, they can’t be marginalized out efficiently enough to be coded in Stan.