Hi,
Here I am generating mixture Gaussian model data and using pystan as a model. I want to run pystan sampling part in multiprocessing, and I put the ‘n_jobs=20’ since my computer has 10 cores. But on the ‘task manager’, the CPU is still something like 5.9%, which means the code did not run in multiprocessing. So anyone can help me to write the code in multiprocessing? Thanks. Below is my code.
from future import division
import pystan
import pymc3 as pm
from scipy import stats
import matplotlib.pyplot as plt
import theano.tensor as tt
import seaborn as sns
import numpy as np
import os
import sys
os.environ[‘STAN_NUM_THREADS’] = “20”
generate the data
component=np.random.choice([0,1],size=50)
x=[]
loc=[[1,5],[5,2]]
s1=1
s2=1
ro=0.5
scale=[[s1*s1,ro*s1*s2],[ros1s2,s2*s2]]
scale=[scale,scale]
for i in component:
x.append(stats.multivariate_normal.rvs(mean=loc[i],cov=scale[i]))
x=np.array(x)
model=’’’
data {
int D; //number of dimensions
int K; //number of gaussians
int N; //number of data
vector[D] y[N]; //data
//real con[N]; //co ncentration
}
parameters {
simplex[K] theta; //mixing proportions
ordered[D] mu[K]; //mixture component means
cholesky_factor_corr[D] L[K]; //cholesky factor of covariance
}
transformed parameters{
cholesky_factor_corr[D] cov[K];
real sx[K];
real sy[K];
real ro[K];
// vector[N] lamba;
cov=L;
for (i in 1:K){
sx[i]=sqrt(cov[i,1,1]);
sy[i]=sqrt(cov[i,2,2]);
ro[i]=cov[i,1,2]/sqrt(cov[i,1,1])/sqrt(cov[i,2,2]);
}
// for (i in 1:N){
// lamba[i]=1/(theta[1](1./2./sx[1]/sy[1]/sqrt(1-ro[1]ro[1])exp(-1./2./(1-ro[1]ro[1])((y[i,1]-mu[1,1])(y[i,1]-mu[1,1])/sx[1]/sx[1])+(y[i,2]-mu[1,2])(y[i,2]-mu[1,2])/sy[1]/sy[1]-2.ro[1](y[i,1]-mu[1,1])(y[i,2]-mu[1,2])/sx[1]/sy[1]))+theta[2](1./2./sx[2]/sy[2]/sqrt(1-ro[2]ro[2])exp(-1./2./(1-ro[2]ro[2])((y[i,1]-mu[2,1])(y[i,1]-mu[2,1])/sx[2]/sx[2])+(y[i,2]-mu[2,2])(y[i,2]-mu[2,2])/sy[2]/sy[2]-2.ro[2](y[i,1]-mu[2,1])(y[i,2]-mu[2,2])/sx[2]/sy[2])));
//}
}
model {
real ps[K];
for(k in 1:K){
mu[k] ~ uniform(-10,10);
L[k] ~ lkj_corr_cholesky(2);
//con ~ exponential(lamba);
}
for (n in 1:N){
for (k in 1:K){
ps[k] = log(theta[k])+multi_normal_cholesky_lpdf(y[n] | mu[k], L[k]); //increment log probability of the gaussian
}
target += log_sum_exp(ps);
}
// for(i in 1:N){
// target += - lamba[i]*con[i];
//}
}
‘’’
if name == ‘main’:
dat={‘N’:50,‘y’:x,‘D’:2,‘K’:2}
fit=pystan.stan(model_code=model, data=dat, iter=5000, warmup=500, chains=1,n_jobs=20)
print(fit)