Description
Describe the bug
I'm fitting a simple varying intercepts model using 'brms' and 'cmdstanr'. The model fits without an issue and I can work with the model output without issue generally, except when I try to extract unconstrained draws. In this case, I get the following message:
Exception: mismatch in number dimensions declared and found in context; processing stage=parameter initialization; variable name=sd_1; dims declared=(1); dims found=()
I can't see that there's anything wrong with the model syntax, but maybe I'm missing something. I'm guessing that somewhere in reading/writing JSON or CSV, there's a type mismatch where something should be a matrix/array but is instead a vector. But that's just a guess.
To Reproduce
library(brms) # 2.20.3
#> Loading required package: Rcpp
#> Loading 'brms' package (version 2.20.3). Useful instructions
#> can be found by typing help('brms'). A more detailed introduction
#> to the package is available through vignette('brms_overview').
#>
#> Attaching package: 'brms'
#> The following object is masked from 'package:stats':
#>
#> ar
library(cmdstanr) # 0.6.0
#> This is cmdstanr version 0.6.0
#> - CmdStanR documentation and vignettes: mc-stan.org/cmdstanr
#> - CmdStan path: C:/Users/benja/OneDrive/Documents/.cmdstan/cmdstan-2.33.1
#> - CmdStan version: 2.33.1
# Create small example dataset
Y <- c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0)
GROUP <- rep(1:4, each = 5)
# Create Stan code and data
stan_code <- brms::make_stancode(
formula = Y ~ (1|GROUP),
family = brms::bernoulli(link = "logit"),
data = data.frame(Y, GROUP)
)
print(stan_code)
#> // generated with brms 2.20.3
#> functions {
#> }
#> data {
#> int<lower=1> N; // total number of observations
#> array[N] int Y; // response variable
#> // data for group-level effects of ID 1
#> int<lower=1> N_1; // number of grouping levels
#> int<lower=1> M_1; // number of coefficients per level
#> array[N] int<lower=1> J_1; // grouping indicator per observation
#> // group-level predictor values
#> vector[N] Z_1_1;
#> int prior_only; // should the likelihood be ignored?
#> }
#> transformed data {
#> }
#> parameters {
#> real Intercept; // temporary intercept for centered predictors
#> vector<lower=0>[M_1] sd_1; // group-level standard deviations
#> array[M_1] vector[N_1] z_1; // standardized group-level effects
#> }
#> transformed parameters {
#> vector[N_1] r_1_1; // actual group-level effects
#> real lprior = 0; // prior contributions to the log posterior
#> r_1_1 = (sd_1[1] * (z_1[1]));
#> lprior += student_t_lpdf(Intercept | 3, 0, 2.5);
#> lprior += student_t_lpdf(sd_1 | 3, 0, 2.5)
#> - 1 * student_t_lccdf(0 | 3, 0, 2.5);
#> }
#> model {
#> // likelihood including constants
#> if (!prior_only) {
#> // initialize linear predictor term
#> vector[N] mu = rep_vector(0.0, N);
#> mu += Intercept;
#> for (n in 1:N) {
#> // add more terms to the linear predictor
#> mu[n] += r_1_1[J_1[n]] * Z_1_1[n];
#> }
#> target += bernoulli_logit_lpmf(Y | mu);
#> }
#> // priors including constants
#> target += lprior;
#> target += std_normal_lpdf(z_1[1]);
#> }
#> generated quantities {
#> // actual population-level intercept
#> real b_Intercept = Intercept;
#> }
stan_data <- brms::make_standata(
formula = Y ~ (1|GROUP),
family = brms::bernoulli(link = "logit"),
data = data.frame(Y, GROUP)
)
# Write the Stan code to a file
stan_file <- cmdstanr::write_stan_file(
code = stan_code
)
# Compile the model
stan_model <- cmdstanr::cmdstan_model(
stan_file = stan_file,
compile_model_methods = TRUE, # Allows access to gradient
force_recompile = TRUE
)
#> In file included from stan/lib/stan_math/stan/math/prim/prob/von_mises_lccdf.hpp:5,
#> from stan/lib/stan_math/stan/math/prim/prob/von_mises_ccdf_log.hpp:4,
#> from stan/lib/stan_math/stan/math/prim/prob.hpp:356,
#> from stan/lib/stan_math/stan/math/prim.hpp:16,
#> from stan/lib/stan_math/stan/math/rev.hpp:14,
#> from stan/lib/stan_math/stan/math.hpp:19,
#> from stan/src/stan/model/model_header.hpp:4,
#> from C:/Users/benja/AppData/Local/Temp/RtmpQ3Mjac/model-7cc50b17e7f.hpp:2:
#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp: In function 'stan::return_type_t<T_x, T_sigma, T_l> stan::math::von_mises_cdf(const T_x&, const T_mu&, const T_k&)':
#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: '-Wmisleading-indentation' is disabled from this point onwards, since column-tracking was disabled due to the size of the code/headers
#> 194 | if (cdf_n < 0.0)
#> |
#> stan/lib/stan_math/stan/math/prim/prob/von_mises_cdf.hpp:194: note: adding '-flarge-source-files' will allow for more column-tracking support, at the expense of compilation time and memory
# Obtain MCMC draws
model_fit <- stan_model$sample(
data = stan_data,
iter_warmup = 10,
iter_sampling = 20,
chains = 2,
parallel_chains = 1,
seed = 1999
)
#> Running MCMC with 2 sequential chains...
#>
#> Chain 1 WARNING: No variance estimation is
#> Chain 1 performed for num_warmup < 20
#> Chain 1 Iteration: 1 / 30 [ 3%] (Warmup)
#> Chain 1 Iteration: 11 / 30 [ 36%] (Sampling)
#> Chain 1 Iteration: 30 / 30 [100%] (Sampling)
#> Chain 1 finished in 0.0 seconds.
#> Chain 2 WARNING: No variance estimation is
#> Chain 2 performed for num_warmup < 20
#> Chain 2 Iteration: 1 / 30 [ 3%] (Warmup)
#> Chain 2 Iteration: 11 / 30 [ 36%] (Sampling)
#> Chain 2 Iteration: 30 / 30 [100%] (Sampling)
#> Chain 2 finished in 0.0 seconds.
#>
#> Both chains finished successfully.
#> Mean chain execution time: 0.0 seconds.
#> Total execution time: 0.9 seconds.
# Extract unconstrained parameter draws
model_fit$init_model_methods()
#> Compiling additional model methods...
model_fit$unconstrain_draws()
#> Error in eval(expr, envir, enclos): Exception: mismatch in number dimensions declared and found in context; processing stage=parameter initialization; variable name=sd_1; dims declared=(1); dims found=() (in 'C:/Users/benja/AppData/Local/Temp/RtmpQ3Mjac/model-7cc50b17e7f.stan', line 19, column 2 to column 28)
Created on 2023-09-23 by the reprex package (v2.0.1)
Expected behavior
I expected model_fit$unconstrain_draws()
to return the unconstrained parameter draws.
Operating system
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 22621)Matrix products: default
CmdStanR version number
0.6.0
Additional context
The purpose for this is I want to make some adjustments to the unconstrained parameter draws. I don't think it really matters for debugging, but in case you're interested the adjustments are to deal with complex survey data, along the lines of this discussion on Discourse: https://discourse.mc-stan.org/t/survey-weights-in-brms-stan-simulation-based-on-design-effect-feedback-sought/28625/13