I declared an array(length T) of J\times I matrix in stan code as the following:
matrix[J,I] X[T];
In the Stan code above, T=15000,J=3,I=3.
However, when passing data of X in R code, I’ve got the following error in my RStudio:
Error in mod$fit_ptr() :
Exception: mismatch in dimension declared and found in context; processing stage=data initialization; variable name=X; position=0; dims declared=(15000,3,3); dims found=(3,3,15000) (in ‘model28739f6392a_estimate_mixed_logit_bayes_simulated_choice_data’ at line 29)
failed to create the sampler; sampling not done.
In the R code, the array of matrix X is indeed an array of length 15000, and each element in the array X is a matrix of dimension 3\times3. I don’t understand why the R code report such an error as mismatch in dimension.
The complete Stan Code is:
// The input data is a vector 'y' of length 'N'.
data {
//individuals的总数
int<lower=1> N;
//某个individual的效用函数中参数beta的总数
int<lower=1> I;
//# of alternatives in each choice set
int<lower=2> J;
//# of choice situations
//T为所有个体(N个)的所有choice sets/choice situations的数量;1个individual有多个choice sets
int<lower=1> T;
int<lower=1,upper=J> Y[T];
int<lower=1,upper=N> id[T]; // respondent index
//每个individual的每个choice set中有J个alternatives,每个alternative有I个betas,所以为J*I的矩阵
//因为所有individual的所有choice sets一共有T个,所以有T个这样的J*I的矩阵
matrix[J,I] X[T]; // array of design matrices
}
// The parameters accepted by the model. Our model
// accepts two parameters 'mu' and 'sigma'.
parameters {
//参数beta服从的正态分布的均值mu
vector[I] mu;
//参数beta服从的正态分布的协方差矩阵分解后得到的对角矩阵中,
//对角线元素构成的向量tau
vector<lower=0>[I] tau;
//相关矩阵Omega的Cholesky factor
cholesky_factor_corr[I] Cholesky_Omega;
vector[I] beta[N];
}
transformed parameters {
//Cholesky factor of covariance matrix
cholesky_factor_cov[I] Cholesky_Sigma; // cholesky factors
//array v的长度为S,每个元素为长度为J的vector;
vector[J] V[T]; // utility vector for each choice set
//计算Cholesky factor of covariance matrix;
//Cholesky_Sigma=diag(tau)*Cholesky_Omega,是以对角线元素为tau的对角矩阵乘以下三角方阵Cholesky_Omega得到
//见p145,Stan Functions Reference
Cholesky_Sigma = diag_pre_multiply(tau, Cholesky_Omega);
for (t in 1:T) {
//V[t]为长度为J的vector,X[t]为J*I的矩阵,beta[id[t]]为长度为I的vector
V[t] = X[t] * beta[id[t]];
}
}
// The model to be estimated. We model the output
// 'y' to be normally distributed with mean 'mu'
// and standard deviation 'sigma'.
model {
//prior for mu
//elements in mu are iid normally distributed,with mean 0 and standard deviation 100
//100为标准差,不是方差;方差=100^2=10000
//increment log-prior
mu ~ normal(0, 100);
//prior for tau
//tau为separation strategy中对角矩阵的对角线元素组成的向量
//向量tau中每个元素服从iid的half-cauchy分布
tau ~ cauchy(0,2.5);
//prior for correlation matrix
Cholesky_Omega ~ lkj_corr_cholesky(1);
//prior for beta
for (n in 1:N){
//相当于multi_normal(mu,Cholesky_Sigma*Cholesky_Sigma^T),
//其中Cholesky_Sigma为Cholesky factor of covariance matrix
//见p151,Stan Functions Reference
//Covariance matrix = Cholesky_Sigma*Cholesky_Sigma^T
//mu为长度为I的vector
//beta[n]的长度为I
//相当于从多元正太分布的联合概率密度中抽取beta[n]中的I个元素
//当然,这里是累加log probability density
//每一个beta[n]都是长度为I的向量
//各beta[n]之间相互独立
beta[n] ~ multi_normal_cholesky(mu,Cholesky_Sigma);
}
for (t in 1:T) {
//Y[t]为一个标量,表示在第s个choice set或choice situation中,哪个alternative被选择
//Y[t]的取值范围为1到J的整数
//V[t]为长度为J的vector,其中的元素表示choice set s中的各alternatives(J个)的直接效用
/*Y[t] ~ categorical_logit(V[t]);表示计算choice set s中被选择的alternative的chocie probability,
并累加log probability mass
*/
//见p100,Stan Functions Reference
//t -> Y[t] & V[t] -> V[t][j] & V[t] -> p[n][t],n=1,...,N; t=1,...,T
/*choice set t确定了其中哪个alternative被选择(Y[t]),
以及choice set t中各alternatives的直接效用(V[t]),
Y[t]和V[t]又确定了被选择的alternative的直接效用(V[t][j]),
而V[t][j]和V[t]又确定了被选择的alternative对应的choice probability(p[n][t])
*/
//y中元素的取值
为1到J之间的
Y[t] ~ categorical_logit(V[t]);
}
}
And the complete R code is:
#清除workplace中所有变量
rm(list=ls())
library(rstan)
#设置原始数据的小数点后面的位数
#但是,通过原始数据计算出的数据,小数点位数可能会更多
options(digits = 22)
#设置工作路径
setwd("/Users/tc/Desktop/Research/Parameter Estimation")
#因为simulated choice panel data.txt中存在header,所以read.table中必须添加header=TRUE
sim_dat <- read.table("/Users/tc/Desktop/Research/Simulation of Choice Data/simulated choice panel data.txt",header = TRUE)
#个体数量
N <- 300
#各alternatives的参数数量
I <- 3
#各choice sets中alternatives的数量
J <- 3
#各individual的choice sets的数量
M <- 50
T <- 15000
#将beta_0对应的自变量取值设为1,并生成design matrix
design_matrix <- model.matrix(~x1+x2,data=sim_dat)
#将list形式的设计矩阵转化为vector形式
design_matrix.vc <- as.vector(t(unname(design_matrix)))
#对多维数组中的每个元素(矩阵)进行转置
X <- aperm(array(design_matrix.vc,dim = c(J,I,T)),perm=c(2,1,3))
#筛选choice==1的行,生成原data frame的subset
sim_dat_choice <- subset(sim_dat,choice==1)
#生成Y
Y <- sim_dat_choice$alt_id
#生成id
id <- sim_dat_choice$indiv_id
fit = stan('estimate_mixed_logit_bayes_simulated_choice_data.stan',data=list(N=N,I=I,J=J,T=T,X=X,Y=Y,id=id),chains=1,cores=2,warmup = 500)