Skip to content

add tolerance args for (l)bfgs #398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@ the CSV. (#394)
troubleshooting, debugging and development. See end of *How does CmdStanR work?*
vignette for details. (#392)

* New `loo()` method for CmdStanMCMC objects. Requires computing pointwise
* New `$loo()` method for CmdStanMCMC objects. Requires computing pointwise
log-likelihood in Stan program. (#366)

* The `fitted_params` argument to the `$generate_quantities()` method now also
accepts CmdStanVB, `posterior::draws_array`, and `posterior::draws_matrix`
objects. (#390)

* The `$optimize()` method now supports all of CmdStan's tolerance-related
arguments for (L)BFGS. (#398)

# cmdstanr 0.2.2

### Bug fixes
Expand Down
55 changes: 46 additions & 9 deletions R/args.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,12 +343,24 @@ OptimizeArgs <- R6::R6Class(
lock_objects = FALSE,
public = list(
method = "optimize",
initialize = function(algorithm = NULL,
initialize = function(iter = NULL,
algorithm = NULL,
init_alpha = NULL,
iter = NULL) {
tol_obj = NULL,
tol_rel_obj = NULL,
tol_grad = NULL,
tol_rel_grad = NULL,
tol_param = NULL,
history_size = NULL) {
self$algorithm <- algorithm
self$init_alpha <- init_alpha
self$iter <- iter
self$init_alpha <- init_alpha
self$tol_obj <- tol_obj
self$tol_rel_obj <- tol_rel_obj
self$tol_grad <- tol_grad
self$tol_rel_grad <- tol_rel_grad
self$tol_param <- tol_param
self$history_size <- history_size
invisible(self)
},
validate = function(num_procs) {
Expand All @@ -365,9 +377,15 @@ OptimizeArgs <- R6::R6Class(
}
new_args <- list(
"method=optimize",
.make_arg("iter"),
.make_arg("algorithm"),
.make_arg("init_alpha"),
.make_arg("iter")
.make_arg("tol_obj"),
.make_arg("tol_rel_obj"),
.make_arg("tol_grad"),
.make_arg("tol_rel_grad"),
.make_arg("tol_param"),
.make_arg("history_size")
)
new_args <- do.call(c, new_args)
c(args, new_args)
Expand Down Expand Up @@ -577,14 +595,33 @@ validate_sample_args <- function(self, num_procs) {
validate_optimize_args <- function(self) {
checkmate::assert_subset(self$algorithm, empty.ok = TRUE,
choices = c("bfgs", "lbfgs", "newton"))
checkmate::assert_integerish(self$iter, lower = 0, null.ok = TRUE, len = 1)
checkmate::assert_integerish(self$iter, lower = 1, null.ok = TRUE, len = 1)
if (!is.null(self$iter)) {
self$iter <- as.integer(self$iter)
}
checkmate::assert_number(self$init_alpha, lower = 0, null.ok = TRUE)
if (!is.null(self$init_alpha) && isTRUE(self$algorithm == "newton")) {
stop("'init_alpha' can't be used when algorithm is 'newton'.",
call. = FALSE)

# check args only available for lbfgs and bfgs
bfgs_args <- c("init_alpha", "tol_obj", "tol_rel_obj", "tol_grad", "tol_rel_grad", "tol_param")
for (arg in bfgs_args) {
# check that arg is positive or NULL and that algorithm='lbfgs' or 'bfgs' is
# explicitly specified (error if not or if 'newton')
if (!is.null(self[[arg]]) && is.null(self$algorithm)) {
stop("Please specify 'algorithm' in order to use '", arg, "'.", call. = FALSE)
}
if (!is.null(self[[arg]]) && isTRUE(self$algorithm == "newton")) {
stop("'", arg, "' can't be used when algorithm is 'newton'.", call. = FALSE)
}
checkmate::assert_number(self[[arg]], .var.name = arg, lower = 0, null.ok = TRUE)
}

# history_size only available for lbfgs
if (!is.null(self$history_size)) {
if (!isTRUE(self$algorithm == "lbfgs")) {
stop("'history_size' is only allowed if 'algorithm' is specified as 'lbfgs'.", call. = FALSE)
} else {
checkmate::assert_integerish(self$history_size, lower = 1, len = 1, null.ok = FALSE)
self$history_size <- as.integer(self$history_size)
}
}

invisible(TRUE)
Expand Down
75 changes: 53 additions & 22 deletions R/model.R
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,7 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method)
#' init = NULL,
#' save_latent_dynamics = FALSE,
#' output_dir = NULL,
#' sig_figs = NULL,
#' chains = 4,
#' parallel_chains = getOption("mc.cores", 1),
#' chain_ids = seq_len(chains),
Expand All @@ -710,7 +711,6 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method)
#' term_buffer = NULL,
#' window = NULL,
#' fixed_param = FALSE,
#' sig_figs = NULL,
#' validate_csv = TRUE,
#' show_messages = TRUE
#' )
Expand Down Expand Up @@ -763,6 +763,8 @@ CmdStanModel$set("public", name = "check_syntax", value = check_syntax_method)
#' although some names are slightly different. They are described briefly here
#' and in greater detail in the CmdStan manual. Arguments left at `NULL`
#' default to the default used by the installed version of CmdStan.
#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/)
#' will have the default values for the latest version of CmdStan.
#'
#' * `iter_sampling`: (positive integer) The number of post-warmup iterations to
#' run per chain.
Expand Down Expand Up @@ -834,6 +836,7 @@ sample_method <- function(data = NULL,
init = NULL,
save_latent_dynamics = FALSE,
output_dir = NULL,
sig_figs = NULL,
chains = 4,
parallel_chains = getOption("mc.cores", 1),
chain_ids = seq_len(chains),
Expand All @@ -853,7 +856,6 @@ sample_method <- function(data = NULL,
term_buffer = NULL,
window = NULL,
fixed_param = FALSE,
sig_figs = NULL,
validate_csv = TRUE,
show_messages = TRUE,
# deprecated
Expand Down Expand Up @@ -952,8 +954,8 @@ sample_method <- function(data = NULL,
init = init,
refresh = refresh,
output_dir = output_dir,
validate_csv = validate_csv,
sig_figs = sig_figs
sig_figs = sig_figs,
validate_csv = validate_csv
)
cmdstan_procs <- CmdStanMCMCProcs$new(
num_procs = chains,
Expand Down Expand Up @@ -1013,6 +1015,7 @@ CmdStanModel$set("public", name = "sample", value = sample_method)
#' init = NULL,
#' save_latent_dynamics = FALSE,
#' output_dir = NULL,
#' sig_figs = NULL,
#' chains = 4,
#' parallel_chains = getOption("mc.cores", 1),
#' chain_ids = seq_len(chains),
Expand All @@ -1031,7 +1034,6 @@ CmdStanModel$set("public", name = "sample", value = sample_method)
#' term_buffer = NULL,
#' window = NULL,
#' fixed_param = FALSE,
#' sig_figs = NULL,
#' validate_csv = TRUE,
#' show_messages = TRUE
#' )
Expand Down Expand Up @@ -1159,7 +1161,7 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method)
#' constrained variables, which shifts the mode due to the change of
#' variables. Thus modes correspond to modes of the model as written.
#'
#' -- [*CmdStan Interface User's Guide*](https://github.com/stan-dev/cmdstan/releases/latest)
#' -- [*CmdStan User's Guide*](https://mc-stan.org/docs/cmdstan-guide/)
#'
#' @section Usage:
#' ```
Expand All @@ -1170,11 +1172,17 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method)
#' init = NULL,
#' save_latent_dynamics = FALSE,
#' output_dir = NULL,
#' sig_figs = NULL,
#' threads = NULL,
#' algorithm = NULL,
#' init_alpha = NULL,
#' iter = NULL,
#' sig_figs = NULL
#' tol_obj = NULL,
#' tol_rel_obj = NULL,
#' tol_grad = NULL,
#' tol_rel_grad = NULL,
#' tol_param = NULL,
#' history_size = NULL
#' )
#' ```
#'
Expand All @@ -1184,16 +1192,27 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi_method)
#' arguments. These arguments are described briefly here and in greater detail
#' in the CmdStan manual. Arguments left at `NULL` default to the default used
#' by the installed version of CmdStan.
#' The latest [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/)
#' will have the defaults for the latest version of CmdStan.
#'
#' * `threads`: (positive integer) If the model was
#' [compiled][model-method-compile] with threading support, the number of
#' threads to use in parallelized sections (e.g., when
#' using the Stan functions `reduce_sum()` or `map_rect()`).
#' * `iter`: (positive integer) The maximum number of iterations.
#' * `algorithm`: (string) The optimization algorithm. One of `"lbfgs"`,
#' `"bfgs"`, or `"newton"`.
#' * `iter`: (positive integer) The number of iterations.
#' * `init_alpha`: (nonnegative real) The line search step size for first
#' iteration. Not applicable if `algorithm="newton"`.
#' `"bfgs"`, or `"newton"`. The control parameters below are only available
#' for `"lbfgs"` and `"bfgs`. For their default values and more details see
#' the CmdStan User's Guide. The default values can also be obtained by
#' running `cmdstanr_example(method="optimize")$metadata()`.
#' * `init_alpha`: (positive real) The initial step size parameter.
#' * `tol_obj`: (positive real) Convergence tolerance on changes in objective function value.
#' * `tol_rel_obj`: (positive real) Convergence tolerance on relative changes in objective function value.
#' * `tol_grad`: (positive real) Convergence tolerance on the norm of the gradient.
#' * `tol_rel_grad`: (positive real) Convergence tolerance on the relative norm of the gradient.
#' * `tol_param`: (positive real) Convergence tolerance on changes in parameter value.
#' * `history_size`: (positive integer) The size of the history used when
#' approximating the Hessian. Only available for L-BFGS.
#'
#' @section Value: The `$optimize()` method returns a [`CmdStanMLE`] object.
#'
Expand All @@ -1208,11 +1227,17 @@ optimize_method <- function(data = NULL,
init = NULL,
save_latent_dynamics = FALSE,
output_dir = NULL,
sig_figs = NULL,
threads = NULL,
algorithm = NULL,
init_alpha = NULL,
iter = NULL,
sig_figs = NULL) {
tol_obj = NULL,
tol_rel_obj = NULL,
tol_grad = NULL,
tol_rel_grad = NULL,
tol_param = NULL,
history_size = NULL) {
checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE)
if (is.null(self$cpp_options()[["stan_threads"]])) {
if (!is.null(threads)) {
Expand All @@ -1231,7 +1256,13 @@ optimize_method <- function(data = NULL,
optimize_args <- OptimizeArgs$new(
algorithm = algorithm,
init_alpha = init_alpha,
iter = iter
iter = iter,
tol_obj = tol_obj,
tol_rel_obj = tol_rel_obj,
tol_grad = tol_grad,
tol_rel_grad = tol_rel_grad,
tol_param = tol_param,
history_size = history_size
)
cmdstan_args <- CmdStanArgs$new(
method_args = optimize_args,
Expand Down Expand Up @@ -1286,6 +1317,7 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method)
#' init = NULL,
#' save_latent_dynamics = FALSE,
#' output_dir = NULL,
#' sig_figs = NULL,
#' threads = NULL,
#' algorithm = NULL,
#' iter = NULL,
Expand All @@ -1296,8 +1328,7 @@ CmdStanModel$set("public", name = "optimize", value = optimize_method)
#' adapt_iter = NULL,
#' tol_rel_obj = NULL,
#' eval_elbo = NULL,
#' output_samples = NULL,
#' sig_figs = NULL
#' output_samples = NULL
#' )
#' ```
#'
Expand Down Expand Up @@ -1343,6 +1374,7 @@ variational_method <- function(data = NULL,
init = NULL,
save_latent_dynamics = FALSE,
output_dir = NULL,
sig_figs = NULL,
threads = NULL,
algorithm = NULL,
iter = NULL,
Expand All @@ -1353,8 +1385,7 @@ variational_method <- function(data = NULL,
adapt_iter = NULL,
tol_rel_obj = NULL,
eval_elbo = NULL,
output_samples = NULL,
sig_figs = NULL) {
output_samples = NULL) {
checkmate::assert_integerish(threads, lower = 1, len = 1, null.ok = TRUE)
if (is.null(self$cpp_options()[["stan_threads"]])) {
if (!is.null(threads)) {
Expand Down Expand Up @@ -1424,9 +1455,9 @@ CmdStanModel$set("public", name = "variational", value = variational_method)
#' data = NULL,
#' seed = NULL,
#' output_dir = NULL,
#' sig_figs = NULL,
#' parallel_chains = getOption("mc.cores", 1),
#' threads_per_chain = NULL,
#' sig_figs = NULL
#' threads_per_chain = NULL
#' )
#' ```
#'
Expand All @@ -1436,7 +1467,7 @@ CmdStanModel$set("public", name = "variational", value = variational_method)
#' - A [posterior::draws_array] (for MCMC) or [posterior::draws_matrix] (for VB)
#' object returned by CmdStanR's [`$draws()`][fit-method-draws] method.
#' - A character vector of paths to CmdStan CSV output files.
#' * `data`, `seed`, `output_dir`, `parallel_chains`, `threads_per_chain`, `sig_figs`:
#' * `data`, `seed`, `output_dir`, `sig_figs`, `parallel_chains`, `threads_per_chain`:
#' Same as for the [`$sample()`][model-method-sample] method.
#'
#' @section Value: The `$generate_quantities()` method returns a [`CmdStanGQ`] object.
Expand Down Expand Up @@ -1492,9 +1523,9 @@ generate_quantities_method <- function(fitted_params,
data = NULL,
seed = NULL,
output_dir = NULL,
sig_figs = NULL,
parallel_chains = getOption("mc.cores", 1),
threads_per_chain = NULL,
sig_figs = NULL) {
threads_per_chain = NULL) {
checkmate::assert_integerish(parallel_chains, lower = 1, null.ok = TRUE)
checkmate::assert_integerish(threads_per_chain, lower = 1, len = 1, null.ok = TRUE)
if (is.null(self$cpp_options()[["stan_threads"]])) {
Expand Down
6 changes: 3 additions & 3 deletions man-roxygen/model-common-args.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
#' methods like `$save_latent_dynamics_files()`).
#' * `sig_figs`: (positive integer) The number of significant figures used
#' for the output values. By default, CmdStan represent the output values with
#' 6 significant figures. The upper limit for `sig_figs` is 18. Increasing
#' this value can cause an increased usage of disk space due to larger
#' 6 significant figures. The upper limit for `sig_figs` is 18. Increasing
#' this value can cause an increased usage of disk space due to larger
#' output CSV files.
#'
#'
6 changes: 3 additions & 3 deletions man/model-method-generate-quantities.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading