How to pass matrix with dimensions (0 x n) to Stan with cmdstanr

Here is the model I’m fitting:

functions {
    real partial_posterior(
        // Thread-specific ----
        array[,] int counts_slice,
        int start, int end,
        // Shared ----
        int n_obs,
        int n_fe,
        matrix fe_design,
        int n_re,
        vector re_design_x,
        array[] int re_design_j,
        array[] int re_design_p,
        array[] int re_id,
        array[] int batch_id,
        vector intercept,
        matrix fe_coefs,
        matrix raw_re_coefs,
        matrix re_sigma,
        matrix lambda,
        vector fe_tau,
        vector re_tau,
        vector sf,
        real iodisp
    ) {
        real log_prob = 0;
        vector[n_obs] log_mu;
        vector[n_re] re_coefs_col;


        int n_slice_feats = end - start + 1; // # of features in slice
        for (i in 1:n_slice_feats) {
            int feat_i = start + i - 1;

            // Scaling random-effects with horseshoe shrinkage
            if (n_re != 0) {
                for (re_i in 1:n_re) {
                    re_coefs_col[re_i] = raw_re_coefs[re_i, feat_i] * (re_sigma[re_id[re_i], feat_i] * re_tau[re_id[re_i]]);
                }
            }

            // Priors ----
            // Normal prior over (raw) random-effects
            log_prob += std_normal_lpdf(raw_re_coefs[, feat_i]);

            // Horseshoe prior over fixed-effects
            log_prob += cauchy_lpdf(lambda[, feat_i] | 0, 1);
            log_prob += normal_lpdf(fe_coefs[, feat_i] | 0, lambda[, feat_i] .* fe_tau);

            // Estimated Feature Expression ----
            log_mu = rep_vector(intercept[feat_i], n_obs);

            if (n_fe != 0) {
                log_mu += fe_design * col(fe_coefs, feat_i);
            }

            if (n_re != 0) {
                log_mu += csr_matrix_times_vector(
                    n_obs, n_re, re_design_x, re_design_j, re_design_p, re_coefs_col
                );
            }

            // Batch-effect Adjustment ----
            for (obs_i in 1:n_obs) {
                log_mu[obs_i] += sf[batch_id[obs_i]];
            }

            // Likelihood ----
            log_prob += neg_binomial_2_log_lpmf(counts_slice[i] | log_mu, iodisp);
        }
        return log_prob;
    }
}
data {
    // Dimensions ----
    int<lower=1> n_obs; // # of observations
    int<lower=1> n_feats; // # of features

    int<lower=0> n_fe; // # of fixed-effects per-gene
    int<lower=0> n_re; // # of random-effects per-gene

    int<lower=0> n_nz_re; // # of nonzero elements in the random-effects design matrix
    int<lower=0> n_re_terms; // # of random-effects terms

    int<lower=1> n_batches; // # of batches

    // Design Matrices ----
    matrix[n_obs, n_fe] fe_design; // fixed-effects model matrix

    // Random-effects model matrix
    vector[n_nz_re] re_design_x;
    array[n_nz_re] int re_design_j;
    array[n_obs + 1] int re_design_p;

    // Index Variables ----
    array[n_re] int<lower=1, upper=n_re_terms> re_id; // random-effects term membership
    array[n_obs] int<lower=1, upper=n_batches> batch_id; // batch membership

    // Response ----
    array[n_feats, n_obs] int<lower=0> counts; // counts matrix
}
parameters {
    // Feature Expression ----
    vector[n_feats] intercept;

    matrix[n_fe, n_feats] fe_coefs;

    matrix[n_re, n_feats] raw_re_coefs;
    matrix<lower=0>[n_re_terms, n_feats] re_sigma;

    // Batch Effect ----
    simplex[n_batches] raw_sf;

    // Shrinkage ----
    vector<lower=0>[n_fe] fe_tau;
    vector<lower=0>[n_re_terms] re_tau;
    matrix<lower=0>[n_fe, n_feats] lambda;

    // Inverse Overdispersion ----
    real<lower=0> iodisp;
}
transformed parameters {
    // Size Factors ----
    vector[n_batches] sf = log(raw_sf) + log(n_batches);
}
model {
    // Constrain global shrinkage parameters ----
    fe_tau ~ exponential(1);
    re_tau ~ exponential(1);

    // Parallel posterior eval ----
    int grainsize = 1;
    target += reduce_sum(
        partial_posterior,
        counts,
        grainsize,
        // Shared ----
        n_obs,
        n_fe,
        fe_design,
        n_re,
        re_design_x,
        re_design_j,
        re_design_p,
        re_id,
        batch_id,
        intercept,
        fe_coefs,
        raw_re_coefs,
        re_sigma,
        lambda,
        fe_tau,
        re_tau,
        sf,
        iodisp
    );
}

I extract the parameters from a point optimization fit like so:

extract_pars <- function(cur_fit, stan_data) {
    params <- list()

    params[["intercept"]] <- unname(cur_fit$mle("intercept"))
    params[["iodisp"]] <- unname(cur_fit$mle("iodisp"))
    params[["raw_sf"]] <- unname(cur_fit$mle("raw_sf"))

    if (stan_data[["n_fe"]] != 0) {
        params[["fe_coefs"]] <- matrix(
            data = cur_fit$mle("fe_coefs"),
            nrow = stan_data[["n_fe"]],
            ncol = stan_data[["n_feats"]]
        )
        params[["lambda"]] <- matrix(
            data = cur_fit$mle("lambda"),
            nrow = stan_data[["n_fe"]],
            ncol = stan_data[["n_feats"]]
        )
        params[["fe_tau"]] <- unname(cur_fit$mle("fe_tau"))
    } else {
        params[["fe_coefs"]] <- matrix(0, 0, stan_data[["n_feats"]])
        params[["lambda"]] <- matrix(0, 0, stan_data[["n_feats"]])
        params[["fe_tau"]] <- numeric(0)
    }

    if (stan_data[["n_re"]] != 0) {
        params[["raw_re_coefs"]] <- matrix(
            data = cur_fit$mle("raw_re_coefs"),
            nrow = stan_data[["n_re"]],
            ncol = stan_data[["n_feats"]]
        )
        params[["re_sigma"]] <- matrix(
            data = cur_fit$mle("re_sigma"),
            nrow = stan_data[["n_re_terms"]],
            ncol = stan_data[["n_feats"]]
        )
        params[["re_tau"]] <- unname(cur_fit$mle("re_tau"))
    } else {
        params[["raw_re_coefs"]] <- matrix(0, 0, stan_data[["n_feats"]])
        params[["re_sigma"]] <- matrix(0, 0, stan_data[["n_re_terms"]])
        params[["re_tau"]] <- numeric(0)
    }

    params
}

In the specific example dataset I’m using I have no random effects, so raw_re_coefs, re_sigma, and re_tau have at least one dimension with 0.

When I don’t pass an init argument everything is fine, however when I try to restart the optimization with a previous fit I get this error:

        cur_fit <- model$optimize(
            data = stan_data,
            init = list(extract_pars(cur_fit, stan_data)),
            init_alpha = init_alpha,
            iter = n_iters,
            show_messages = FALSE,
            sig_figs = 18,
            threads = n_threads,
            algorithm = "lbfgs"
        )
Chain 1 Unrecoverable error evaluating the log probability at the initial value.
Chain 1 Exception: mismatch in number dimensions declared and found in context; processing stage=parameter initialization; variable name=raw_re_coefs; dims declared=(0,10000); dims found=(0) (in '/var/folders/21/t9hvtpkd72n1pjw4nvd3g0g80000gn/T/RtmpvAaIdT/model-142992041453e.stan', line 108, column 4 to column 39)
Warning: Fitting finished unexpectedly! Use the $output() method for more information.

Error: Fitting failed. Unable to print.

I saw from previous posts that you have to ensure the matrix has storage mode numeric, and I’ve confirmed that the mode(matrix(0, 0, stan_data[["n_feats"]])) is numeric, so I’m unsure what the problem is.

cmdstanr supports providing a previous fit of the same model as initial values (including zero-dim parameters), rather than writing your own extract_pars function for this:

library(cmdstanr)

stcode <- "
  data {
    real y_mean;
  }
  parameters {
    real y;
    matrix[0,10000] dev;
  }
  model {
    y ~normal(y_mean, 1);
  }
"
mod <- cmdstan_model(write_stan_file(stcode))

opt <- mod$optimize(data=list(y_mean=0))
opt2 <- mod$optimize(data=list(y_mean=0),init=opt)

1 Like

Hi yes I am aware and was relying on this functionality previously to test for convergence of a subset of parameters(by starting and stopping with a smaller number of iterations), however for one of my test datasets I had issues where I would get this error:

Chain 1 Partial user-specified initialization failed. Initialization of non user specified parameters between (-2, 2) failed after 100 attempts.
Chain 1  Try specifying full initial values, reducing the range of constrained values, or reparameterizing the model.

This is (hopefully on other computers as well) a reproducible example:

library(curl)
library(R.utils)
library(data.table)
library(cmdstanr)

# Download counts and construct metadata
counts_path <- curl::curl_download(
    url = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE273924&format=file&file=GSE273924%5Fraw%5Fcounts%2Etsv%2Egz",
    destfile = paste0(tempdir(), "/counts.tsv.gz")
)
counts <- data.table::fread(counts_path)

metadata <- data.frame(
    "sample_id" = c(colnames(counts)[-1]),
    "condition" = factor(rep(c("control", "lps", "nelps", "ne"), each = 3)),
    "batch_id" = factor(rep(
        c("batch-1", "batch-2", "batch-3", "batch-4"),
        each = 3
    ))
)

# Coerce to formatted matrix
gene_names <- counts$gene_id
counts <- t(as.matrix(counts[, -1]))
colnames(counts) <- gene_names

# Subset features
ordering <- order(Matrix::colMeans(counts), decreasing = TRUE)
counts <- counts[, ordering[1:10000]]

# Allocate named list for Stan
stan_data <- list(n_obs = nrow(counts), n_feats = ncol(counts))


stan_data[["n_batches"]] <- length(levels(metadata[["batch_id"]]))
stan_data[["batch_id"]] <- as.integer(metadata[["batch_id"]])

fe_design <- model.matrix(~condition, metadata)

stan_data[["n_fe"]] <- ncol(fe_design)
stan_data[["fe_design"]] <- fe_design

stan_data[["n_re"]] <- 0L
stan_data[["n_nz_re"]] <- 0L
stan_data[["re_design_x"]] <- numeric(0)
stan_data[["re_design_j"]] <- integer(0)
stan_data[["re_design_p"]] <- integer(stan_data[["n_obs"]] + 1)
stan_data[["n_re_terms"]] <- 0L
stan_data[["re_id"]] <- integer(0)

stan_data[["counts"]] <- counts |> t()

stcode <- "
functions {
    real partial_posterior(
        // Thread-specific ----
        array[,] int counts_slice,
        int start, int end,
        // Shared ----
        int n_obs,
        int n_fe,
        matrix fe_design,
        int n_re,
        vector re_design_x,
        array[] int re_design_j,
        array[] int re_design_p,
        array[] int re_id,
        array[] int batch_id,
        vector intercept,
        matrix fe_coefs,
        matrix raw_re_coefs,
        matrix re_sigma,
        matrix lambda,
        vector fe_tau,
        vector re_tau,
        vector sf,
        real iodisp
    ) {
        real log_prob = 0;
        vector[n_obs] log_mu;
        vector[n_re] re_coefs_col;


        int n_slice_feats = end - start + 1; // # of features in slice
        for (i in 1:n_slice_feats) {
            int feat_i = start + i - 1;

            // Scaling random-effects with horseshoe shrinkage
            if (n_re != 0) {
                for (re_i in 1:n_re) {
                    re_coefs_col[re_i] = raw_re_coefs[re_i, feat_i] * (re_sigma[re_id[re_i], feat_i] * re_tau[re_id[re_i]]);
                }
            }

            // Priors ----
            // Normal prior over (raw) random-effects
            log_prob += std_normal_lpdf(raw_re_coefs[, feat_i]);

            // Horseshoe prior over fixed-effects
            log_prob += cauchy_lpdf(lambda[, feat_i] | 0, 1);
            log_prob += normal_lpdf(fe_coefs[, feat_i] | 0, lambda[, feat_i] .* fe_tau);

            // Estimated Feature Expression ----
            log_mu = rep_vector(intercept[feat_i], n_obs);

            if (n_fe != 0) {
                log_mu += fe_design * col(fe_coefs, feat_i);
            }

            if (n_re != 0) {
                log_mu += csr_matrix_times_vector(
                    n_obs, n_re, re_design_x, re_design_j, re_design_p, re_coefs_col
                );
            }

            // Batch-effect Adjustment ----
            for (obs_i in 1:n_obs) {
                log_mu[obs_i] += sf[batch_id[obs_i]];
            }

            // Likelihood ----
            log_prob += neg_binomial_2_log_lpmf(counts_slice[i] | log_mu, iodisp);
        }
        return log_prob;
    }
}
data {
    // Dimensions ----
    int<lower=1> n_obs; // # of observations
    int<lower=1> n_feats; // # of features

    int<lower=0> n_fe; // # of fixed-effects per-gene
    int<lower=0> n_re; // # of random-effects per-gene

    int<lower=0> n_nz_re; // # of nonzero elements in the random-effects design matrix
    int<lower=0> n_re_terms; // # of random-effects terms

    int<lower=1> n_batches; // # of batches

    // Design Matrices ----
    matrix[n_obs, n_fe] fe_design; // fixed-effects model matrix

    // Random-effects model matrix
    vector[n_nz_re] re_design_x;
    array[n_nz_re] int re_design_j;
    array[n_obs + 1] int re_design_p;

    // Index Variables ----
    array[n_re] int<lower=1, upper=n_re_terms> re_id; // random-effects term membership
    array[n_obs] int<lower=1, upper=n_batches> batch_id; // batch membership

    // Response ----
    array[n_feats, n_obs] int<lower=0> counts; // counts matrix
}
parameters {
    // Feature Expression ----
    vector[n_feats] intercept;

    matrix[n_fe, n_feats] fe_coefs;

    matrix[n_re, n_feats] raw_re_coefs;
    matrix<lower=0>[n_re_terms, n_feats] re_sigma;

    // Batch Effect ----
    simplex[n_batches] raw_sf;

    // Shrinkage ----
    vector<lower=0>[n_fe] fe_tau;
    vector<lower=0>[n_re_terms] re_tau;
    matrix<lower=0>[n_fe, n_feats] lambda;

    // Inverse Overdispersion ----
    real<lower=0> iodisp;
}
transformed parameters {
    // Size Factors ----
    vector[n_batches] sf = log(raw_sf) + log(n_batches);
}
model {
    // Constrain global shrinkage parameters ----
    fe_tau ~ exponential(1);
    re_tau ~ exponential(1);

    // Parallel posterior eval ----
    int grainsize = 1;
    target += reduce_sum(
        partial_posterior,
        counts,
        grainsize,
        // Shared ----
        n_obs,
        n_fe,
        fe_design,
        n_re,
        re_design_x,
        re_design_j,
        re_design_p,
        re_id,
        batch_id,
        intercept,
        fe_coefs,
        raw_re_coefs,
        re_sigma,
        lambda,
        fe_tau,
        re_tau,
        sf,
        iodisp
    );
}
"

model <- cmdstan_model(
    write_stan_file(stcode),
    cpp_options = list(stan_threads = TRUE)
)

cur_fit <- model$optimize(
    data = stan_data,
    iter = 100,
    init_alpha = 1e-6,
    show_messages = TRUE,
    sig_figs = 18,
    threads = 4,
    algorithm = "lbfgs"
)

next_fit <- model$optimize(
    data = stan_data,
    init = cur_fit,
    iter = 100,
    init_alpha = 1e-6,
    show_messages = TRUE,
    sig_figs = 18,
    threads = 4,
    algorithm = "lbfgs"
)

I will get this output:

> cur_fit <- model$optimize(
+     data = stan_data,
+     iter = 100,
+     init_alpha = 1e-6,
+     show_messages = TRUE,
+     sig_figs = 18,
+     threads = 4,
+     algorithm = "lbfgs"
+ )
Initial log joint probability = -4.40696e+08
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes
Exception: Exception: neg_binomial_2_log_lpmf: Log location parameter[7] is -inf, but must be finite! (in '/var/folders/21/t9hvtpkd72n1pjw4nvd3g0g80000gn/T/RtmpBR8Y6L/model-15baa1979f0ab.stan', line 73, column 12 to column 82) (in '/var/folders/21/t9hvtpkd72n1pjw4nvd3g0g80000gn/T/RtmpBR8Y6L/model-15baa1979f0ab.stan', line 141, column 4 to line 164, column 6)
Error evaluating model log probability: Non-finite function evaluation.
Error evaluating model log probability: Non-finite function evaluation.
Error evaluating model log probability: Non-finite function evaluation.
      99  -1.51988e+06       70.3729       6336.82       0.418           1      115
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes
     100   -1.5192e+06       11.9694       5145.94      0.4179      0.4179      116
Optimization terminated normally:
  Maximum number of iterations hit, may not be at an optima
Finished in  6.8 seconds.

It evidently can evaluate at the last iteration as it returns a log prob of -1.5192e+06, however when I run the next fit:

> next_fit <- model$optimize(
+     data = stan_data,
+     init = cur_fit,
+     iter = 100,
+     init_alpha = 1e-6,
+     show_messages = TRUE,
+     sig_figs = 18,
+     threads = 4,
+     algorithm = "lbfgs"
+ )
Init values were only set for a subset of parameters.
Missing init values for the following parameters:
raw_re_coefs, re_sigma, re_tau

To disable this message use options(cmdstanr_warn_inits = FALSE).

Rejecting initial value:
  Log probability evaluates to log(0), i.e. negative infinity.
  Stan can't start sampling from this initial value.
...
Rejecting initial value:
  Log probability evaluates to log(0), i.e. negative infinity.
  Stan can't start sampling from this initial value.
Chain 1 Partial user-specified initialization failed. Initialization of non user specified parameters between (-2, 2) failed after 100 attempts.
Chain 1  Try specifying full initial values, reducing the range of constrained values, or reparameterizing the model.
Chain 1 Initialization failed.

It can’t start where it stopped, and for some reason when it throws away the given initial values it also can’t completely restart either. I was hoping manually providing the parameters as a list would fix the issue (shot in the dark as this hasn’t happened to me before), but if providing the fit is the regular way of doing things it might be a separate issue.

Thank you for the help

I think this might be a model issue rather than a cmdstanr/inits issue, since the final estimated parameters (i.e., from cur_fit) do not give a valid log-probability.

You can verify this by passing the final draws/estimates to the model’s log-probability function:

> cur_fit$init_model_methods()
> cur_fit$log_prob(cur_fit$unconstrain_draws())
[1] -Inf

I’d recommend simplifying your model as much as possible, and iteratively adding complexity until you identify the model components that are causing the instability