I’m trying to get the LDA model to work that’s in the Stan manual. I’m using the exact same mode with the exception that I’m feeding in data as a matrix containing 0 values rather than the long vector used on the manual. The data is one-hot with 70% of the observations occurring in feature 10 and 30% of observations with a 1 on the feature #25. For the life of me I can’t figure out how to make this stable. I know that in chapter 17.3 of the manual there is discussion around not using Stan for mixtures. However, both Gelman’s blog and chapter 17.5 of the Stan manual seem to indicate that this would work. Am I missing something conceptually, or am I coding this incorrectly?
Below is the data injection and Stan modeling in case it points out something I’m doing wrong. Thanks!
library(rstan)
library(shinystan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
#GENERATE FAKE DATA
# FAKE DATA HAS 2 MIXTURES
# EACH MIXTURE HAS 1 SPECIFIC SIGNAL COLUMN
if(1) {
features = 50
datapoints = 100
noise_features_per_row = 0
# real_columns are 10 and 25
# cluster_proporitions = 0.7,0.3
data = matrix(0, nrow = datapoints, ncol = features)
for (i in 1:datapoints) {
#data[i,sample(1:features, noise_features_per_row, replace=F)]=1
if(i < 70)
data[i,10] = 1
else
data[i,25] = 1
}
}
K = 2 #TOPICS
M = dim(data)[1] #ROWS/OBSERVATIONS
V = dim(data)[2] #FEATURES/COLUMNS
data = exp(data)
injected_data = list(
#OBSERVATIONS
M = M,
K = K,
V = V,
y = data,
#HYPERPRIORS
alpha = rep(0.1, K),
beta = rep(0.01, V)
)
model_to_use = "
data {
int<lower=1> K; // num topics
int<lower=2> V; // num features
int<lower=1> M; // num rows
vector<lower=0>[K] alpha; // topic prior
vector<lower=0>[V] beta; // feature columns prior
vector[V] y[M];
}
parameters {
simplex[K] theta[M]; // topic dist for doc m
simplex[V] phi[K]; // word dist for topic k
}
model {
for (m in 1:M)
theta[m] ~ dirichlet(alpha); //prior
for (k in 1:K)
phi[k] ~ dirichlet(beta); //prior
for (m in 1:M) {
for (v in 1:V) {
real eta[K];
for (k in 1:K) {
if (y[m,v] != 0)
eta[k] = log(theta[m, k]) + log(phi[k, v]);
}
if (y[m,v] != 0)
target += log_sum_exp(eta); //likelihood
}
}
}
"
#FIT THE MODEL
fit = stan(
model_code = model_to_use,
data = injected_data,
iter = 1000,
chains = 2
)
traceplot(fit,c('phi[1,10]','phi[2,10]','phi[1,25]','phi[2,25]','phi[1,49]','phi[2,49]'),nrow=3,ncol=2)
oo_summary <- summary(fit)
print(oo_summary)
write.table(oo_summary,file="oo_summary.csv",sep=",") # keeps the rownames
#launch_shinystan(as.shinystan(fit))