I’m having trouble trying to fit a logistic function. I’ve reduced the issue down to simple test case described below. I’ve also posted the question on the stats StackExchange site with some exploration of the model fit.
What I’m seeing is that in order to fit the function, it is not enough to have priors and initialization centered on parameters that generated the data but also the priors need to be extremely tight.
Model
library(rstan)
library(bayesplot)
library(plyr)
library(dplyr)
model_code <- "
functions {
real sigmoid(real dose, real ed50, real slope){
return inv_logit((dose-ed50)*slope);
}
}
data {
int<lower=0> N;
real dose[N];
real<lower=0,upper=1> response[N];
real<lower=0> response_v[N];
real ed50_prior_mu;
real ed50_prior_sd;
real slope_prior_mu;
real slope_prior_sd;
}
parameters {
real slope;
real ed50;
}
transformed parameters {
real response_mu[N];
for (j in 1:N){
response_mu[j] = inv_logit((dose[j]-ed50)*slope);
}
}
model {
slope ~ normal(slope_prior_mu, slope_prior_sd);
ed50 ~ normal(ed50_prior_mu, ed50_prior_sd);
for (j in 1:N){
response ~ beta(response_mu[j]*response_v[j], (1-response_mu[j])*response_v[j]);
}
}
"
dose_response <- rstan::stan_model(
model_code=model_code,
model_name="dose_response")
dose_response %>% rstan::expose_stan_functions()
data
dose_min=-10
dose_max=10
# prior
ed50_mu <- 0
slope_mu <- 1
ed50_sd <- .01
slope_sd <- .01
# true parameters
ed50 <- ed50_mu
slope <- slope_mu
data <- data.frame(dose=rep(x=seq(dose_min, dose_max, length.out=10), times=10)) %>%
dplyr::mutate(
response = vapply(dose, function(d)
sigmoid(
dose=d,
ed50=ed50,
slope=slope),
1.0))
Sample from prior
# sample prior parameters
prior <- data.frame(
ed50 = rnorm(n=20, mean=ed50_mu, sd=ed50_sd),
slope = rnorm(n=20, mean=slope_mu, sd=slope_sd)) %>%
dplyr::mutate(draw_id=row_number()) %>%
plyr::adply(1, function(df){
data.frame(dose=seq(dose_min, dose_max, length.out=200)) %>%
dplyr::mutate(
response = vapply(dose, function(d) sigmoid(
dose=d,
ed50=df$ed50[1],
slope=df$slope[1]), 1.0))
sample from posterior
samples <- rstan::sampling(
object=dose_response,
data=list(
N=nrow(data),
dose=data$dose,
response=data$response,
response_v=rep(11, times=nrow(data)),
ed50_prior_mu = ed50_mu,
ed50_prior_sd = ed50_sd,
slope_prior_mu = slope_mu,
slope_prior_sd = slope_sd),
init=list(
chain1=list(ed50=0, slope=1),
chain2=list(ed50=0, slope=1),
chain3=list(ed50=0, slope=1),
chain4=list(ed50=0, slope=1)),
control=list(
adapt_delta=0.999,
max_treedepth=12))
posterior <- samples %>%
extract(pars=c("ed50", "slope")) %>%
data.frame() %>%
dplyr::sample_n(20) %>%
dplyr::mutate(draw_id=row_number()) %>%
plyr::adply(1, function(df){
data.frame(dose=seq(dose_min, dose_max, length.out=200)) %>%
dplyr::mutate(
response = vapply(dose, function(d) sigmoid(
dose=d,
ed50=df$ed50[1],
slope=df$slope[1]), 1.0))
})
plot data, prior, and posterior
p <- ggplot() + theme_bw() +
geom_line(data=data, mapping=aes(x=dose, y=response), color="black", size=2) +
geom_point(data=data, mapping=aes(x=dose, y=response), color="black", size=3) +
geom_line(data=prior, mapping=aes(x=dose, y=response, group=draw_id), color="red", size=.8, alpha=.3) +
geom_line(data=posterior, mapping=aes(x=dose, y=response, group=draw_id), color="blue", size=.8, alpha=.3) +
scale_y_continuous("Response Percent", limits=c(0, 1), labels=scales::percent) +
scale_x_continuous("Dose")
p
Data(black), prior(red), posterior (blue):