Skip to content

Commit deeebdc

Browse files
authored
Merge pull request #434 from stan-dev/profiling
Profiling
2 parents 5de494b + e6ae1ad commit deeebdc

File tree

9 files changed

+310
-14
lines changed

9 files changed

+310
-14
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ files. (#414)
1212

1313
* Faster CSV reading for multiple chains. (#419)
1414

15+
* New `profiles()` method for fitted model objects accesses profiling
16+
information from R if profiling used in the Stan program. Support for profiling
17+
Stan programs requires CmdStan >= 2.26. (#434)
18+
1519

1620
# cmdstanr 0.3.0
1721

R/args.R

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,13 @@ CmdStanArgs <- R6::R6Class(
7575
invisible(self)
7676
},
7777

78-
new_file_names = function(type = c("output", "diagnostic")) {
78+
new_file_names = function(type = c("output", "diagnostic", "profile")) {
7979
basename <- self$model_name
8080
type <- match.arg(type)
8181
if (type == "diagnostic") {
8282
basename <- paste0(basename, "-diagnostic")
83+
} else if (type == "profile") {
84+
basename <- paste0(basename, "-profile")
8385
}
8486
generate_file_names( # defined in utils.R
8587
basename = basename,
@@ -89,9 +91,8 @@ CmdStanArgs <- R6::R6Class(
8991
random = TRUE
9092
)
9193
},
92-
new_files = function(type = c("output", "diagnostic")) {
94+
new_files = function(type = c("output", "diagnostic", "profile")) {
9395
files <- file.path(self$output_dir, self$new_file_names(type))
94-
invisible(file.create(files))
9596
files
9697
},
9798

@@ -101,12 +102,14 @@ CmdStanArgs <- R6::R6Class(
101102
#' @param idx The run id. For MCMC this is the chain id, for optimization
102103
#' this is just 1.
103104
#' @param output_file File path to csv file where output will be written.
105+
#' @param profile_file File path to csv file where profile data will be written.
104106
#' @param latent_dynamics_file File path to csv file where the extra latent
105107
#' dynamics information will be written.
106108
#' @return Character vector of arguments of the form "name=value".
107109
#'
108110
compose_all_args = function(idx = NULL,
109111
output_file = NULL,
112+
profile_file = NULL,
110113
latent_dynamics_file = NULL) {
111114
args <- list()
112115
idx <- idx %||% 1
@@ -143,6 +146,10 @@ CmdStanArgs <- R6::R6Class(
143146
args$output <- c(args$output, paste0("sig_figs=", self$sig_figs))
144147
}
145148

149+
if (!is.null(profile_file)) {
150+
args$output <- c(args$output, paste0("profile_file=", profile_file))
151+
}
152+
146153
args <- do.call(c, append(args, list(use.names = FALSE)))
147154
self$method_args$compose(idx, args)
148155
},
@@ -803,7 +810,12 @@ validate_seed <- function(seed, num_procs) {
803810
if (is.null(seed)) {
804811
return(invisible(TRUE))
805812
}
806-
checkmate::assert_integerish(seed, lower = 1)
813+
if (cmdstan_version() < "2.26") {
814+
lower_seed <- 1
815+
} else {
816+
lower_seed <- 0
817+
}
818+
checkmate::assert_integerish(seed, lower = lower_seed)
807819
if (length(seed) > 1 && length(seed) != num_procs) {
808820
stop("If 'seed' is specified it must be a single integer or one per chain.",
809821
call. = FALSE)

R/csv.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ unavailable_methods_CmdStanFit_CSV <- c(
469469
"output",
470470
"return_codes",
471471
"num_procs",
472+
"save_profile_files", "profile_files", "profiles",
472473
"time" # available for MCMC not others
473474
)
474475
error_unavailable_CmdStanFit_CSV <- function(...) {

R/fit.R

Lines changed: 81 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -400,10 +400,10 @@ CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose)
400400
#' Save output and data files
401401
#'
402402
#' @name fit-method-save_output_files
403-
#' @aliases fit-method-save_data_file fit-method-save_latent_dynamics_files
404-
#' fit-method-output_files fit-method-data_file fit-method-latent_dynamics_files
405-
#' save_output_files save_data_file save_latent_dynamics_files
406-
#' output_files data_file latent_dynamics_files
403+
#' @aliases fit-method-save_data_file fit-method-save_latent_dynamics_files fit-method-save_profile_files
404+
#' fit-method-output_files fit-method-data_file fit-method-latent_dynamics_files fit-method-profile_files
405+
#' save_output_files save_data_file save_latent_dynamics_files save_profile_files
406+
#' output_files data_file latent_dynamics_files profile_files
407407
#'
408408
#' @description All fitted model objects have methods for saving (moving to a
409409
#' specified location) the files created by CmdStanR to hold CmdStan output
@@ -434,6 +434,10 @@ CmdStanFit$set("public", name = "cmdstan_diagnose", value = cmdstan_diagnose)
434434
#' `$save_output_files()` except `"-diagnostic-"` is included in the new
435435
#' file name after `basename`.
436436
#'
437+
#' For `$save_profile_files()` everything is the same as for
438+
#' `$save_output_files()` except `"-profile-"` is included in the new
439+
#' file name after `basename`.
440+
#'
437441
#' For `$save_data_file()` no `id` is included in the file name because even
438442
#' with multiple MCMC chains the data file is the same.
439443
#'
@@ -478,6 +482,15 @@ save_latent_dynamics_files <- function(dir = ".",
478482
}
479483
CmdStanFit$set("public", name = "save_latent_dynamics_files", value = save_latent_dynamics_files)
480484

485+
#' @rdname fit-method-save_output_files
486+
save_profile_files <- function(dir = ".",
487+
basename = NULL,
488+
timestamp = TRUE,
489+
random = TRUE) {
490+
self$runset$save_profile_files(dir, basename, timestamp, random)
491+
}
492+
CmdStanFit$set("public", name = "save_profile_files", value = save_profile_files)
493+
481494
#' @rdname fit-method-save_output_files
482495
save_data_file <- function(dir = ".",
483496
basename = NULL,
@@ -496,6 +509,12 @@ output_files <- function(include_failed = FALSE) {
496509
}
497510
CmdStanFit$set("public", name = "output_files", value = output_files)
498511

512+
#' @rdname fit-method-save_output_files
513+
profile_files <- function(include_failed = FALSE) {
514+
self$runset$profile_files(include_failed)
515+
}
516+
CmdStanFit$set("public", name = "profile_files", value = profile_files)
517+
499518
#' @rdname fit-method-save_output_files
500519
latent_dynamics_files <- function(include_failed = FALSE) {
501520
self$runset$latent_dynamics_files(include_failed)
@@ -641,6 +660,63 @@ return_codes <- function() {
641660
}
642661
CmdStanFit$set("public", name = "return_codes", value = return_codes)
643662

663+
#' Return profiling data
664+
#'
665+
#' @name fit-method-profiles
666+
#' @aliases profiles
667+
#' @description The `$profiles()` method returns a list of data frames with
668+
#' profiling data if any profiling data was written to the profile CSV files.
669+
#' See [save_profile_files()] to control where the files are saved.
670+
#'
671+
#' Support for profiling Stan programs is available with CmdStan >= 2.26 and
672+
#' requires adding profiling statements to the Stan program.
673+
#'
674+
#' @return A list of data frames with profiling data if the profiling CSV files
675+
#' were created.
676+
#'
677+
#' @seealso [`CmdStanMCMC`], [`CmdStanMLE`], [`CmdStanVB`], [`CmdStanGQ`]
678+
#' @examples
679+
#' \dontrun{
680+
#' # first fit a model using MCMC
681+
#' mcmc_program <- write_stan_file(
682+
#' "data {
683+
#' int<lower=0> N;
684+
#' int<lower=0,upper=1> y[N];
685+
#' }
686+
#' parameters {
687+
#' real<lower=0,upper=1> theta;
688+
#' }
689+
#' model {
690+
#' profile("likelihood") {
691+
#' y ~ bernoulli(theta);
692+
#' }
693+
#' }
694+
#' generated quantities {
695+
#' int y_rep[N];
696+
#' profile("gq") {
697+
#' y_rep = bernoulli_rng(rep_vector(theta, N));
698+
#' }
699+
#' }
700+
#' "
701+
#' )
702+
#' mod_mcmc <- cmdstan_model(mcmc_program)
703+
#'
704+
#' data <- list(N = 10, y = c(1,1,0,0,0,1,0,1,0,0))
705+
#' fit <- mod_mcmc$sample(data = data, seed = 123, refresh = 0)
706+
#'
707+
#' fit$profiles()
708+
#' }
709+
#'
710+
profiles <- function() {
711+
profiles <- list()
712+
i <- 1
713+
for (f in self$profile_files()) {
714+
profiles[[i]] <- data.table::fread(f, data.table = FALSE)
715+
i <- i + 1
716+
}
717+
profiles
718+
}
719+
CmdStanFit$set("public", name = "profiles", value = profiles)
644720

645721
# CmdStanMCMC -------------------------------------------------------------
646722
#' CmdStanMCMC objects
@@ -786,7 +862,7 @@ CmdStanMCMC <- R6::R6Class(
786862
csv_contents$post_warmup_draws[,,missing_variables],
787863
along="variable"
788864
)
789-
}
865+
}
790866
}
791867
if (!is.null(csv_contents$post_warmup_sampler_diagnostics)) {
792868
if (is.null(private$sampler_diagnostics_)) {

R/run.R

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ CmdStanRun <- R6::R6Class(
2323
self$args <- args
2424
self$procs <- procs
2525
private$output_files_ <- self$new_output_files()
26+
if (cmdstan_version() >= "2.26.0") {
27+
private$profile_files_ <- self$new_profile_files()
28+
}
2629
if (self$args$save_latent_dynamics) {
2730
private$latent_dynamics_files_ <- self$new_latent_dynamics_files()
2831
}
@@ -40,6 +43,9 @@ CmdStanRun <- R6::R6Class(
4043
new_latent_dynamics_files = function() {
4144
self$args$new_files(type = "diagnostic")
4245
},
46+
new_profile_files = function() {
47+
self$args$new_files(type = "profile")
48+
},
4349
latent_dynamics_files = function(include_failed = FALSE) {
4450
if (!length(private$latent_dynamics_files_)) {
4551
stop(
@@ -63,7 +69,22 @@ CmdStanRun <- R6::R6Class(
6369
private$output_files_[ok]
6470
}
6571
},
66-
72+
profile_files = function(include_failed = FALSE) {
73+
files <- private$profile_files_
74+
if (!length(files) || !any(file.exists(files))) {
75+
stop(
76+
"No profile files found. ",
77+
"The model that produced the fit did not use any profiling.",
78+
call. = FALSE
79+
)
80+
}
81+
if (include_failed) {
82+
files
83+
} else {
84+
ok <- self$procs$is_finished() | self$procs$is_queued()
85+
files[ok]
86+
}
87+
},
6788
save_output_files = function(dir = ".",
6889
basename = NULL,
6990
timestamp = TRUE,
@@ -90,9 +111,9 @@ CmdStanRun <- R6::R6Class(
90111
invisible(new_paths)
91112
},
92113
save_latent_dynamics_files = function(dir = ".",
93-
basename = NULL,
94-
timestamp = TRUE,
95-
random = TRUE) {
114+
basename = NULL,
115+
timestamp = TRUE,
116+
random = TRUE) {
96117
current_files <- self$latent_dynamics_files(include_failed = TRUE) # used so we get error if 0 files
97118
new_paths <- copy_temp_files(
98119
current_paths = current_files,
@@ -114,6 +135,31 @@ CmdStanRun <- R6::R6Class(
114135
private$latent_dynamics_files_saved_ <- TRUE
115136
invisible(new_paths)
116137
},
138+
save_profile_files = function(dir = ".",
139+
basename = NULL,
140+
timestamp = TRUE,
141+
random = TRUE) {
142+
current_files <- self$profile_files(include_failed = TRUE) # used so we get error if 0 files
143+
new_paths <- copy_temp_files(
144+
current_paths = current_files,
145+
new_dir = dir,
146+
new_basename = paste0(basename %||% self$model_name(), "-profile"),
147+
ids = self$proc_ids(),
148+
ext = ".csv",
149+
timestamp = timestamp,
150+
random = random
151+
)
152+
file.remove(current_files[!current_files %in% new_paths])
153+
private$profile_files_ <- new_paths
154+
message(
155+
"Moved ",
156+
length(current_files),
157+
" files and set internal paths to new locations:\n",
158+
paste("-", new_paths, collapse = "\n")
159+
)
160+
private$profile_files_saved_ <- TRUE
161+
invisible(new_paths)
162+
},
117163
save_data_file = function(dir = ".",
118164
basename = NULL,
119165
timestamp = TRUE,
@@ -144,6 +190,7 @@ CmdStanRun <- R6::R6Class(
144190
self$args$compose_all_args(
145191
idx = j,
146192
output_file = private$output_files_[j],
193+
profile_file = private$profile_files_[j],
147194
latent_dynamics_file = private$latent_dynamics_files_[j] # maybe NULL
148195
)
149196
})
@@ -212,9 +259,11 @@ CmdStanRun <- R6::R6Class(
212259
),
213260
private = list(
214261
output_files_ = character(),
262+
profile_files_ = NULL,
215263
output_files_saved_ = FALSE,
216264
latent_dynamics_files_ = NULL,
217265
latent_dynamics_files_saved_ = FALSE,
266+
profile_files_saved_ = FALSE,
218267
command_args_ = list(),
219268

220269
finalize = function() {
@@ -223,7 +272,9 @@ CmdStanRun <- R6::R6Class(
223272
if (!private$output_files_saved_)
224273
self$output_files(include_failed = TRUE),
225274
if (self$args$save_latent_dynamics && !private$latent_dynamics_files_saved_)
226-
self$latent_dynamics_files(include_failed = TRUE)
275+
self$latent_dynamics_files(include_failed = TRUE),
276+
if (cmdstan_version() > "2.25.0" && !private$profile_files_saved_)
277+
private$profile_files_
227278
)
228279
unlink(temp_files)
229280
}

man/fit-method-profiles.Rd

Lines changed: 57 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)