Skip to content

strange behavior when cmdstanr is parallelized #326

Closed
@bnicenboim

Description

@bnicenboim

Describe the bug
strange behavior when a model is fit inside parallel::mclapply. There are NAs where they shouldn't be.

To Reproduce
It doesn't happen always, but here there is an example:

set.seed(123)
library(dplyr)
library(cmdstanr)
m <- "data {
  int<lower = 0> J;
}
transformed data {
  real<lower = 0> alpha_ = lognormal_rng(log(4), .1);
  real<lower = 0> beta_ = lognormal_rng(log(.5), .1);
  real y[J];
  for (j in 1:J){
       y[j]= gamma_rng(alpha_, beta_);
  }
}
parameters {
  real<lower=0> alpha;
  real<lower=0> beta;
}
transformed parameters {
}
model {
  target +=  lognormal_lpdf(alpha | log(4), .1);
  target +=  lognormal_lpdf(beta | log(.5), .1);
  target += gamma_lpdf(y| alpha, beta);
}
generated quantities {
  real y_[J] = y;
  vector[2] pars_= [alpha_, beta_]';
  int ranks_[2] = {alpha > alpha_, beta > beta_};
  vector[J] log_lik;
  for(j in 1:J)
    log_lik[j] = gamma_lpdf(y[j] | alpha, beta);
}
"


model <- cmdstan_model(write_stan_file(m))

pars_names <- c("alpha", "beta")
out_parallel <-   parallel::mclapply(1:5, FUN = function(S) {
        out <- model$sample(data = list(J=10),
                                    chains = 1L,
                                    parallel_chains = 1L,
                                    seed = S,
                                    thin = 1L)#, ...)
    Y <- out$draws("y_") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
          colMeans()
        pars <- out$draws("pars_") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
          colMeans()
          names(pars) <- pars_names
       ranks <- out$draws("ranks_") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
         as.matrix()
        colnames(ranks) <-  pars_names
       log_lik <- out$draws("log_lik") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
         as.matrix()
        list(Y=Y, pars = pars)
})

out_parallel

## > out_parallel
## [[1]]
## [[1]]$Y
##     y_[1]     y_[2]     y_[3]     y_[4]     y_[5]     y_[6]     y_[7]     y_[8]
## 13.358768 10.833558 11.797914  8.280695  4.922092        NA        NA        NA
##     y_[9]    y_[10]
##        NA        NA

## [[1]]$pars
## alpha  beta
##    NA    NA


## [[2]]
## [[2]]$Y
##     y_[1]     y_[2]     y_[3]     y_[4]     y_[5]     y_[6]     y_[7]     y_[8]
## 13.358768 10.833558 11.797914  8.280695  4.922092        NA        NA        NA
##     y_[9]    y_[10]
##        NA        NA

## [[2]]$pars
## alpha  beta
##    NA    NA

(continues, but those NAs shouldn't be there).

Expected behavior
The serial version works fine:

out_serial <-   lapply(1:5, FUN = function(S) {
        out <- model$sample(data = list(J=10),
                                    chains = 1L,
                                    parallel_chains = 1L,
                                    seed = S,
                                    thin = 1L)#, ...)
    Y <- out$draws("y_") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
          colMeans()
        pars <- out$draws("pars_") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
          colMeans()
          names(pars) <- pars_names
       ranks <- out$draws("ranks_") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
         as.matrix()
        colnames(ranks) <-  pars_names
       log_lik <- out$draws("log_lik") %>%
          posterior::as_draws_df() %>%
          dplyr::select(-starts_with(".")) %>%
         as.matrix()
        list(Y=Y, pars = pars)
    })

out_serial

## [[1]]
## [[1]]$Y
##    y_[1]    y_[2]    y_[3]    y_[4]    y_[5]    y_[6]    y_[7]    y_[8]
##  7.18980  6.88780 10.46080  4.34584  3.78183  6.25249  6.49124 15.61800
##    y_[9]   y_[10]
##  3.16917  2.24205

## [[1]]$pars
##    alpha     beta
## 3.636410 0.515895


## [[2]]
## [[2]]$Y
##    y_[1]    y_[2]    y_[3]    y_[4]    y_[5]    y_[6]    y_[7]    y_[8]
## 18.76470 14.29770 12.99720 11.75190  5.94025  5.11902  4.30613 16.12750
##    y_[9]   y_[10]
##  7.81212  6.80070

## [[2]]$pars
##    alpha     beta
## 4.258750 0.429109

(continues)

Operating system
Ubuntu 20.04.1 LTS

CmdStanR version number
0.1.3

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions