# Latent Dirichlet Allocation model

#1

Dear Stan communities,

I am trying to write a Latent Dirichlet Allocation model in Stan. Even though the code seems to work, Stan won’t update any of the parameters so all the parameters stay at its initial value. I wonder what the problem is. Any help will be greatly appreciated. Please see the code for data simulation and Stan below. Thanks.

``````rm(list=ls())
library(MCMCpack)
library(rstan)

path <- 'd:/'

#===============
# simulate data
#===============
C <- 10 # max number of clusters
C_real <- 3
L <- 100 # number of locations
S <- 20 # number of species
n <- S * 5

V <- matrix(rbeta(L*C_real,1,C_real-1), L, C_real)
theta <- matrix(, L, C_real)
q <- matrix(, L, C_real-1)
for (i in 1:L) {
theta[i,1] <- V[i,1]
q[i,1] <- 1 - theta[i,1]
for (c in 2:(C_real-1)) {
theta[i,c] <- V[i,c] * q[i,c-1]
q[i,c] <- q[i,c-1] - theta[i,c]
} # c
theta[i,C_real] <- q[i,C_real-1]
} # i

alpha <- c(rep(.2,3), rep((1-.2*3)/(S-3),S-3))
phi <- matrix(, C_real, S)
for (c in 1:C_real) {
phi[c,] <- rdirichlet(1, sample(alpha, S, replace=F))
}

pi <- matrix(, L, S)
for (i in 1:L) {
for (s in 1:S) {
pi[i,s] <- sum(theta[i,] * phi[,s])
}
}

y <- matrix(, L, S)
for (i in 1:L) {
y[i,] <- rmultinom(n=1, size=n, pi[i,])
}

beta <- rep(1/S, S)

#==============
# define model
#==============
sink(paste(c(path, 'lda_stb2.stan'), collapse=''))
cat("

data{
int<lower=0> C; # max number of clusters
int<lower=0> L; # number of locations
int<lower=0> S; # number of species
int<lower=0> y[L, S]; # data
vector<lower=0>[S] beta; # prior for phi
} # data

parameters {
matrix<lower=0,upper=1>[L, C] V; # a location by cluster matrix
matrix<lower=0,upper=1>[S, C] phi; # a cluster by species matrix
} # parameters

transformed parameters{
#  vector<lower=0>[S] alpha; # prior for phi
matrix<lower=0,upper=1>[C, L] theta; # truncated stick-breaking prior
matrix<lower=0,upper=1>[L, C-1] q;
matrix<lower=0,upper=1>[S, L] pi; # probability of observation for each location and species

#  for (s in 1:S) {
#    alpha[s] <- 1;
#  } # s

for (i in 1:L) {
theta[1,i] <- V[i,1];
q[i,1] <- 1 - theta[1,i];
for (c in 2:(C-1)) {
theta[c,i] <- V[i,c] * q[i,c-1];
q[i,c] <- q[i,c-1] - theta[c,i];
} # C-1
theta[C,i] <- q[i,C-1];
} # i

for (i in 1:L) {
for (s in 1:S) {
pi[s,i] <- row(phi,s) * col(theta,i);
} # S
} # L
} # transformed parameters

model {
for (i in 1:L) {
for (c in 1:C) {
V[i,c] ~ beta(1,.5);
}
} # L

for (c in 1:C) {
col(phi,c) ~ dirichlet(beta);
} # L

for (i in 1:L) {
y[i] ~ multinomial(col(pi,i));
} # i
} # model

", fill=TRUE)
sink()

#===========
# call STAN
#===========
data <- list(C=C, L=L, S=S, y=y, beta=beta)

V_init <- cbind(V, matrix(.01, L, C-C_real))
phi_init <- t(rbind(phi, matrix(1/S, C-C_real, S)))

init_fun <- function() {
list(
V = V_init,
phi = phi_init
) # list
} # function

fit <- stan(file=paste(c(path, 'lda_stb2.stan'), collapse=''),
data=data, init=init_fun,
par=c('theta', 'phi'),
I think you’re missing simplex declarations. Did you read the version in the manual? The columns of `phi` and of `pi` need to be simplexes for the `dirichlet` to work. You might find it easier using an array of simplexes here rather than the columsn of a matrix. There’s an example in the manual.