Why is my markov chain mainly sampling from around 1 and not exploring full space?

jorgen · April 3, 2024, 11:36am

See question in title. Here is the code:

Data for stan model (as lists):

> head(data.frame(stan_data_participant1),10)
     N Source1 Source2 y
1  150   0.999   0.750 1
2  150   0.625   0.250 0
3  150   0.625   0.875 1
4  150   0.500   0.125 0
5  150   0.750   0.750 1
6  150   0.999   0.999 1
7  150   0.375   0.375 0
8  150   0.625   0.375 0
9  150   0.750   0.500 1
10 150   0.999   0.999 1

Model compilation:

WB_mod <- cmdstan_model(file, cpp_options = list(stan_threads = TRUE), stanc_options = list("O1"), pedantic = TRUE)

Model fit:

WB_fit <- WB_mod$sample(
  data = stan_data_participant1,
  seed = 123,
  chains = 2,
  parallel_chains = 2,
  threads_per_chain = 2,
  iter_warmup = 1500,
  iter_sampling = 3000,
  refresh = 500
)

Model specification:

data {
  int<lower=0> N; // number of trials
  array[N] int<lower=0,upper=1> y; // discrete choice 
  array[N] real <lower = 0, upper = 1> Source1; // own source
  array[N] real <lower = 0, upper = 1> Source2; // other source 
}


transformed data {
  array[N] real l_Source1; // array of len N with logit of Source1
  array[N] real l_Source2; // array of len N with logit of Source2
  l_Source1 = logit(Source1); // logit of Source1
  l_Source2 = logit(Source2); // logit of Source2
}

parameters {
  real bias; // bias param
  // meaningful weights are btw 0.5 and 1 (theory reasons)
  real<lower = 0.5, upper = 1> w1; // weight for own source real number between 0.5 and 1
  real<lower = 0.5, upper = 1> w2; // weight for other source real number between 0.5 and 1
}

transformed parameters {
  real<lower = 0, upper = 1> weight1; // weight for own source
  real<lower = 0, upper = 1> weight2; // weight for other source
  // weight parameters are rescaled to be on a 0-1 scale (0 -> no effects; 1 -> face value)
  weight1 = (w1 - 0.5) * 2;  // rescale weight1
  weight2 = (w2 - 0.5) * 2;  // rescale weight2 
}

model {
  
  target += normal_lpdf(bias | 0, 1); // prior for bias 
  target += beta_lpdf(weight1 | 1, 1); // prior for weight1 -> uniform prior
  target += beta_lpdf(weight2 | 1, 1); // prior for weight2 -> uniform prior
  
  for (n in 1:N)
    target += bernoulli_logit_lpmf(y[n] | bias + weight1 * l_Source1[n] + weight2 * l_Source2[n]);
}

generated quantities{
  array[N] real log_lik; // array of len N with log likelihood
  real bias_prior; // prior for bias
  real w1_prior; // prior for weight1
  real w2_prior; // prior for weight2
  bias_prior = normal_rng(0, 1) ; // sample from prior for bias
  w1_prior = 0.5 + inv_logit(normal_rng(0, 1))/2 ; // sample from prior for weight1
  w2_prior = 0.5 + inv_logit(normal_rng(0, 1))/2 ; // sample from prior for weight2
  for (n in 1:N)
    log_lik[n]= bernoulli_logit_lpmf(y[n] | bias + weight1 * l_Source1[n] + weight2 * l_Source2[n]);
}

The markov chains

simonbrauer · April 3, 2024, 12:00pm

Welcome to the Stan Discourse. It might be helpful to see a few more features of the data and model draws.

Scatter plot of Source1 and Source2 for y=0 and y=1 separately (or with different colors)
the same plot except with the predicted probabilities from the model rather than Source1 and 2 (you can use inv_logit for this)

jorgen · April 3, 2024, 12:36pm

Here is a fully reproducible example (in .Rmd format) - one just have to create a WB_model.stan file in the same directory as the .Rmd and insert the model specifications as described in the initial question.


pacman::p_load(tidyverse, cmdstanr, brms)

# Function to make sure the denominator is never 0 (which produces NaN since you can't divide by 0.
logit_scaled <- function(p) {
  log(p / (1 - p + 1e-9))
}

simpleBayes_f <- function(bias, Source1, Source2){
    outcome <- inv_logit_scaled(bias + logit_scaled(Source1 / 8) + logit_scaled(Source2 / 8)) # dividing by 8 to scale the ratings to 0-1 (probability scale)
    return(outcome)
}

WeightedBayes_f <- function(bias, Source1, Source2, w1, w2){
    w1 <- (w1 - 0.5)*2
    w2 <- (w2 - 0.5)*2
    
    # Define or load the inv_logit_scaled and logit_scaled functions here
    outcome <- inv_logit_scaled(bias + w1 * logit_scaled(Source1) + w2 * logit_scaled(Source2)) # 
    return(outcome)
}

Creating simple data

trials <- 150
participants <- 20
ratings <- 1:8 # the participants can rate the faces from 1 to 8
w1 <- 0.6 # the weight for own rating
w2 <- 0.7 # the weight for group rating
bias <- 0 # assuming no bias
feedback <- c(-3, -2 ,0 ,2, 3) # the group rating is randomly chosen from this vector and subtracted from the first rating

sim_data <- tibble()

sim_data$w1 <- w1
sim_data$w2 <- w2
sim_data$bias <- bias

for (participant in seq(participants)){ # for each participant
    for (i in seq(trials)) { # for each trial
        sim_data <- rbind(sim_data, tibble(
            participant = participant,
            FaceID = i,
            FirstRating = sample(ratings, 1, prob = c(0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.7, 0.4)),
            GroupRating = pmax(1, pmin(8, FirstRating + (sample(feedback, 1)))), # pmin / pmax ensures that the group rating is within 1-8
            # add feedback saying if how much higher or lower the group rating is compared to the first rating
            feedback = GroupRating - FirstRating      
        ))
    }
}

Simulating data from 1) simple bayes model

# Simple Bayes second rating (after having seen the group rating)
for (participant in seq(participants)){ # for each participant
    for (i in seq(trials)) { # for each trial
        sim_data <- sim_data %>% 
            mutate(SecondRating_belief_SB = simpleBayes_f(bias, FirstRating, GroupRating)) %>% # SB = simple bayes
            mutate(SB_binary_rating = ifelse(SecondRating_belief_SB > 0.5, 1, 0)) %>% # binary rating (either trustworthiness or not) Alternativ: rbinom(1,1,SB_binary_rating)
            mutate(SB_number_rating = round(SecondRating_belief_SB * 8)) # number rating (1-8)
    }
}

Simulating data from 2) weighted bayes model

# Weighted Bayes second rating (after having seen the group rating)
for (participant in seq(participants)){ # for each participant
    for (i in seq(trials)) { # for each trial
        sim_data <- sim_data %>% 
            mutate(SecondRating_belief_WB = WeightedBayes_f(bias, (FirstRating/8), (GroupRating/8), w1, w2)) %>% # WB = weighted bayes
            mutate(WB_binary_rating = ifelse(SecondRating_belief_WB > 0.5, 1, 0)) %>% # binary rating (either trustworthiness or not) Alternativ: rbinom(1,1,WB_binary_rating)
            mutate(WB_number_rating = round(SecondRating_belief_WB * 8)) # number rating (1-8) - multiplying the belief back again
    }
}

file ← file.path(“stan_models/WB_model.stan”)

WB_mod ← cmdstan_model(file, cpp_options = list(stan_threads = TRUE), stanc_options = list(“O1”), pedantic = TRUE)


```{r}
# Data preparation
stan_data <- list(
    N = nrow(sim_data),
    Source1 = pmin(sim_data$FirstRating/8, 0.999), # dividing by 8 to scale the ratings to 0-1 (probability scale), and using pmin to avoid exact 0/1
    Source2 = pmin(sim_data$GroupRating/8, 0.999), # dividing by 8 to scale the ratings to 0-1 (probability scale)
    y = sim_data$WB_binary_rating
)

# make a new dataframe, only for participant 1
stan_data_participant1 <- sim_data %>% filter(participant == 1) %>% 
    mutate(Source1 = pmin(FirstRating/8, 0.999), # dividing by 8 to scale the ratings to 0-1 (probability scale), and using pmin to avoid exact 0/1
           Source2 = pmin(GroupRating/8, 0.999)) # dividing by 8 to scale the ratings to 0-1 (probability scale)

# make it into list
stan_data_participant1 <- list(
    N = nrow(stan_data_participant1),
    Source1 = stan_data_participant1$Source1,
    Source2 = stan_data_participant1$Source2,
    y = stan_data_participant1$WB_binary_rating
)

head(data.frame(stan_data_participant1),10)

Fitting model

WB_fit <- WB_mod$sample(
  data = stan_data_participant1,
  seed = 123,
  chains = 2,
  parallel_chains = 2,
  threads_per_chain = 2,
  iter_warmup = 1500,
  iter_sampling = 3000,
  refresh = 500
)

Plotting WB fit

# Extracting draws
draws_WB <- as_draws_df(WB_fit$draws())

# plotting markov chains for bias
plt1 <- ggplot(draws_WB, aes(.iteration, bias, group = .chain, color = .chain)) +
  geom_line(alpha = 0.5) +
  labs(title = "Markov chains for bias (WB model)",
        subtitle = "on simulated data",
       x = "Iteration",
       y = "Bias") +
  theme_classic()

plt2 <- ggplot(draws_WB, aes(.iteration, w1, group = .chain, color = .chain)) +
    geom_line(alpha = 0.5) +
    labs(title = "Markov chains for w1 (WB model)",
            subtitle = "on simulated data",
         x = "Iteration",
         y = "w1") +
    theme_classic()

plt3 <- ggplot(draws_WB, aes(.iteration, w2, group = .chain, color = .chain)) +
    geom_line(alpha = 0.5) +
    labs(title = "Markov chains for w2 (WB model)",
            subtitle = "on simulated data",
         x = "Iteration",
         y = "w2") +
    theme_classic()

plt4 <- grid.arrange(plt1, plt2, plt3, nrow = 3)


# save to drive
ggsave("figures/MarkovChains_WB_sim_data.png", plt4, width = 10, height = 10)

Topic		Replies	Views
Dealing with memory issues in Markov chain style model Modeling	3	77	November 10, 2024
Problem "running the model cmdstanr with simulated data in R" CmdStan techniques , fitting-issues , specification	14	1759	February 16, 2022
Sampling function taking too long to run at Chain 4 Modeling	2	692	April 12, 2022
Stan got stuck when sampling in chains Modeling	7	1063	February 15, 2023
Issues in my stan code Modeling rstan , fitting-issues	8	566	January 26, 2021

Why is my markov chain mainly sampling from around 1 and not exploring full space?

Creating simple data

Simulating data from 1) simple bayes model

Simulating data from 2) weighted bayes model

Related topics