setwd(dir = "~/Desktop/stan_gaussian_map_issues2") library(rstan) library(magrittr) init = readRDS("init.Rds") init$gene_distortions_raw = init$gene_distortions input = readRDS("input.Rds") # Set very concentrated priors input$mean_prior_mean = 0 input$mean_prior_sd = 10 input$distortion_prior_mean = 0 input$distortion_prior_sd = 0.001 # Fit both models mod = rstan::stan_model("labels_known.stan") mod_reparam = rstan::stan_model("labels_known_reparam.stan") fit = rstan::optimizing( object = mod, as_vector = F, verbose = T, init = init, data = input) fit_reparam = rstan::optimizing( object = mod_reparam, as_vector = F, verbose = T, init = init, data = input) # The naive parameterization totally ignores the concentrated prior fit$par$gene_distortions %>% sd # The reparameterized version does not ignore the prior fit_reparam$par$gene_distortions %>% sd # They get roughly similar error variances. fit$par$gene_sds fit_reparam$par$gene_sds # What's the posterior mode supposed to be for cluster 1? Nice quick reference here: # https://people.eecs.berkeley.edu/~jordan/courses/260-spring10/lectures/lecture5.pdf # This ignores shrinkage of the training-set means, but that's minor. k = 1 mle = input$sample_means_test - input$sample_means_train prior_var = input$distortion_prior_sd^2 error_var = fit$par$gene_sds^2 / input$n_by_cluster_test[k] prior_mean = 0 posterior_mean_by_hand = (mle * prior_var + prior_mean * error_var) / (error_var + prior_var) plot( posterior_mean_by_hand[,k], mle[,k], col = "black" , main = "Posterior means", xlab = "Hand-calculated", ylab = "value") abline( a = 0, b = 1 ) points( posterior_mean_by_hand[,k], fit$par$gene_distortions[,k], col = "red" ) points( posterior_mean_by_hand[,k], fit_reparam$par$gene_distortions[,k], col = "blue" ) legend( "topleft", legend = c("mle", "naive", "reparam", "by_hand (y=x)"), col = c("black", "red", "blue", "black"), pch = c("o", "o", "o", "l") ) posterior_var_by_hand = 1/ ((1/prior_var) + (1/error_var)) posterior_var_by_hand # In case the models use difference constant terms, I evaluate the log # posterior for each one using the parameters found by both itself and by the other. # Conclusion: they do use different constant terms, but # each model's estimates basically attain the maximum when plugged into the other model. c( "naive_naive" = fit$value, "reparam_naive" = rstan::optimizing( object = mod_reparam, init = c( fit$par, list( gene_means_raw = (fit$par$gene_means - input$mean_prior_mean ) / input$mean_prior_sd, gene_distortions_raw = (fit$par$gene_distortions - input$distortion_prior_mean ) / input$distortion_prior_sd ) ), iter = 0, data = input)$value, "reparam_reparam" = fit_reparam$value, "naive_reparam" = rstan::optimizing( object = mod, init = fit_reparam$par, iter = 0, data = input)$value )