I’ve fit a stan_gamm4 model using log transformed data (that is otherwise all positive) and gaussian family (link = ‘identity’) with one smooth and when I plot the smooth using plot_nonlinear, the scale on the y-axis is not on the real world scale. So my first questions are: What is the scale on the y-axis, and why is it not on the same scale as the input (why would you not want the model fit on the same scale you’re working on)?
Further, playing around a little with the plot_nonlinear function I’ve extracted the snippet of code pasted below, with a few minor modifications, which looks like it brings back the data on the scale of my input prior to log transformation. It’s unclear to me whether this is coincidental or whether there’s a logical explanation to it, perhaps someone is able to clarify this?
plotdata.func <- function(x){
smooths <- 's(the.smooth)'
XZ <- x$x
XZ <- XZ[, !grepl("_NEW_", colnames(XZ), fixed = TRUE)]
labels <- sapply(x$jam$smooth, "[[", "label")
xnames <- sapply(x$jam$smooth, "[[", "vn")
names(x$jam$smooth) <- labels
names(xnames) <- labels
fs <- sapply(x$jam$smooth, FUN = "inherits", what = "fs.interaction")
B <- as.matrix(x)[, colnames(XZ), drop = FALSE]
original <- x$jam$model
df_list <- lapply(x$jam$smooth[smooths], FUN = function(s) {
incl <- s$first.para:s$last.para
b <- B[, incl, drop = FALSE]
xz <- XZ[, incl, drop = FALSE]
x <- original[, s$term]
ord <- order(x)
x <- x[ord]
xz <- xz[ord, , drop = FALSE]
f <- rstanarm:::linear_predictor.matrix(b, xz)
data.frame(predictor = x,
lower95 = apply(f, 2, quantile, probs = (1 - 0.95)/2), upper95 = apply(f, 2, quantile, probs = 0.95 + (1 - 0.95)/2),
lower90 = apply(f, 2, quantile, probs = (1 - 0.90)/2), upper90 = apply(f, 2, quantile, probs = 0.90 + (1 - 0.90)/2),
lower80 = apply(f, 2, quantile, probs = (1 - 0.80)/2), upper80 = apply(f, 2, quantile, probs = 0.80 + (1 - 0.80)/2),
middle = apply(f, 2, median), term = s$label)
})
plot_data <- do.call(rbind, df_list) %>% distinct()
plot_data[, which(colnames(plot_data) == 'lower95'):which(colnames(plot_data) == 'middle')] <- plot_data[, which(colnames(plot_data) == 'lower95'):which(colnames(plot_data) == 'middle')] + mean(tmp$Cstock_kgm2)
plot_data <- plot_data %>% distinct()
rownames(plot_data) <- NULL
return(plot_data)
}
Operating System: Windows 10