Dear all Stanians,
I am trying to set up a Weibull survival regression model using the Gehan data in Cox’s 1972 paper (p.196). It has a group indicator with the treatment group using drug 6-MP and a control group, a duration variable (leukemia remission in weeks), and a censoring variable with one for uncensored cases and zero for censored cases. I modeled after the mice example at : Mice Example Link
I was able to get it run and produce results. But the mice example uses contrast instead of dummy variables to set up the model. So for the four groups in the mice data, the model setup estimates four beta’s (group means) and then get contrast by subtracting one from another. Mine has two groups. To play safe, I first set up my model with two beta’s. But I want to set it up using a dummy variable so that when it comes to several covariates, I can simply expand the dimension of the beta matrix without much ado. My second model (one dummy variable) setup seems working, but the results are different from the first set up (with two beta’s for two groups). Any help would be greatly appreciated.
Jun Xu, PhD
Professor
Department of Sociology
Ball State University
Muncie, IN 47306
Model Setup 1: Two Beta’s for Two Groups
Initially drug coded as 1, control as 0; now use group_uncensored and group_censored to recode group variable so that drug use coded as 2 and control as 1.
group_uncensored: (drug group for all uncensored cases: 1 or 2)
group_censored: (drug group for all censored cases: all 2’s since all censored cases are in the drug use group)
rm(list=ls(all=TRUE))
require(foreign)
require(rstan)
require(MASS)
require(VGAM)
require(loo)
time <- c(6, 6, 6, 6, 7, 9, 10, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 32, 34, 35,
1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22, 23)
drug <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
status <- c(0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
N_uncensored <- sum(status == 1)
N_censored <- sum(status == 0)
group_uncensored <- drug[status == 1] + 1
group_censored <- drug[status == 0] + 1
t_uncensored <- time[status == 1]
censor_time <- time[status == 0]
M = 2
modelString = "
Then Embed Stan Codes: Model Setup with Two Beta’s for Two Groups
data {
int<lower=0> N_uncensored;
int<lower=0> N_censored;
int<lower=0> M;
int<lower=1,upper=M> group_uncensored[N_uncensored];
int<lower=1,upper=M> group_censored[N_censored];
real<lower=0> censor_time[N_censored];
real<lower=0> t_uncensored[N_uncensored];
}
parameters {
real<lower=0> r;
real beta[M];
// t_censored / censor_time
real<lower=1> t2_censored[N_censored];
}
model {
r ~ exponential(0.001);
beta ~ normal(0, 100);
for (n in 1:N_uncensored) {
t_uncensored[n] ~ weibull(r, exp(-beta[group_uncensored[n]] / r));
}
for (n in 1:N_censored) {
t2_censored[n] ~ weibull(r, exp(-beta[group_censored[n]] / r) / censor_time[n]);
}
}
generated quantities {
real median[M];
real drug;
// real veh_control;
for (m in 1:M)
median[m] = pow(log(2) * exp(-beta[m]), 1/r);
drug = beta[2] - beta[1];
}
Then R Codes Again
" # close quote for modelstring
writeLines(modelString,con="model.txt")
dataList = list(
N_censored = N_censored ,
N_uncensored = N_uncensored ,
M = M ,
group_uncensored = group_uncensored ,
group_censored = group_censored,
censor_time = censor_time,
t_uncensored = t_uncensored
)
parameters = c("beta", "drug") # The parameter(s) to be monitored.
adaptSteps = 500 # Number of steps to "tune" the samplers.
burnInSteps = 500 # Number of steps to "burn-in" the samplers.
nChains = 3 # Number of chains to run.
numSavedSteps=6000 # Total number of steps in chains to save.
thinSteps=1 # Number of steps to "thin" (1=keep every step).
nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.
# Burn-in:
cat( "Burning in the MCMC chain...\n" )
# The saved MCMC chain:
cat( "Sampling final MCMC chain...\n" )
time.used <- proc.time()
mcmcSamples <- stan(model_code=modelString, data=dataList, seed = 47306,
pars=parameters, chains=nChains, # init=initsChains,
iter=nPerChain,
warmup=burnInSteps ) # init=initsChains
summary(mcmcSamples)
print(mcmcSamples)
# mcmcChain = as.matrix(mcmcSamples)
# Stop the clock
proc.time() - time.used
Model Setup 2: One Beta for A Dummy Variable for Two Groups
Initially drug coded as 1, control as 0; now use group_uncensored and group_censored to recode group variable so that drug use coded as 2 and control as 1.
group_uncensored: (drug group for all uncensored cases: 0 or 1 )
group_censored: (drug group for all censored cases: all 1’s since all censored cases are in the drug use group)
rm(list=ls(all=TRUE))
require(foreign)
require(rstan)
require(MASS)
require(VGAM)
require(loo)
time <- c(6, 6, 6, 6, 7, 9, 10, 10, 11, 13, 16, 17, 19, 20, 22, 23, 25, 32, 32, 34, 35,
1, 1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 8, 8, 11, 11, 12, 12, 15, 17, 22, 23)
drug <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
status <- c(0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
N_uncensored <- sum(status == 1)
N_censored <- sum(status == 0)
group_uncensored <- drug[status == 1]
group_censored <- drug[status == 0]
t_uncensored <- time[status == 1]
censor_time <- time[status == 0]
M = 1
modelString = "
Then Embed Stan Codes
data {
int<lower=0> N_uncensored;
int<lower=0> N_censored;
int<lower=0> M;
int<lower=0,upper=1> group_uncensored[N_uncensored];
int<lower=0,upper=1> group_censored[N_censored];
real<lower=0> censor_time[N_censored];
real<lower=0> t_uncensored[N_uncensored];
}
parameters {
real<lower=0> r;
vector[M] beta;
// t_censored / censor_time
real<lower=1> t2_censored[N_censored];
}
model {
r ~ exponential(0.001);
beta ~ normal(0, 100);
for (n in 1:N_uncensored) {
t_uncensored[n] ~ weibull(r, exp(-beta*group_uncensored[n] / r));
}
for (n in 1:N_censored) {
t2_censored[n] ~ weibull(r, exp(-beta*group_censored[n] / r) / censor_time[n]);
}
}
generated quantities {
real median[M];
real drug;
// real veh_control;
for (m in 1:M)
median[m] = pow(log(2) * exp(-beta[m]), 1/r);
}
Then R Codes Again
" # close quote for modelstring
writeLines(modelString,con="model.txt")
dataList = list(
N_censored = N_censored ,
N_uncensored = N_uncensored ,
M = M ,
group_uncensored = group_uncensored ,
group_censored = group_censored,
censor_time = censor_time,
t_uncensored = t_uncensored
)
parameters = c("beta") # The parameter(s) to be monitored.
adaptSteps = 500 # Number of steps to "tune" the samplers.
burnInSteps = 500 # Number of steps to "burn-in" the samplers.
nChains = 3 # Number of chains to run.
numSavedSteps=6000 # Total number of steps in chains to save.
thinSteps=1 # Number of steps to "thin" (1=keep every step).
nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.
# Burn-in:
cat( "Burning in the MCMC chain...\n" )
# The saved MCMC chain:
cat( "Sampling final MCMC chain...\n" )
time.used <- proc.time()
mcmcSamples <- stan(model_code=modelString, data=dataList, seed = 47306,
pars=parameters, chains=nChains, # init=initsChains,
iter=nPerChain,
warmup=burnInSteps ) # init=initsChains
summary(mcmcSamples)
print(mcmcSamples)
# mcmcChain = as.matrix(mcmcSamples)
# Stop the clock
proc.time() - time.used
bayesWeibullGehan01Post.R (4.0 KB)
bayesWeibullGehan02Post.R (4.0 KB)