| Title: | Run Multiverse Style Analyses |
|---|---|
| Description: | Run the same analysis over a range of arbitrary data processing decisions. 'multitool' provides an interface for creating alternative analysis pipelines and turning them into a grid of all possible pipelines. Using this grid as a blueprint, you can model your data across all possible pipelines and summarize the results. |
| Authors: | Ethan Young [aut, cre, cph] (ORCID: <https://orcid.org/0000-0002-8232-0184>), Stefan Vermeent [aut] (ORCID: <https://orcid.org/0000-0002-9595-5373>) |
| Maintainer: | Ethan Young <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.1.5.9000 |
| Built: | 2026-05-21 17:18:24 UTC |
| Source: | https://github.com/ethan-young/multitool |
Add filtering/exclusion criteria to a multiverse pipeline
add_filters(.df, ..., remove_do_nothing = FALSE)add_filters(.df, ..., remove_do_nothing = FALSE)
.df |
The original |
... |
logical expressions to be used with |
remove_do_nothing |
logical, |
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5)library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5)
Add a model and formula to a multiverse pipeline
add_model( .df, model_desc, code, model_coefs = parameters::parameters(), model_fit = performance::performance(), model_standardize = parameters::standardize_parameters() )add_model( .df, model_desc, code, model_coefs = parameters::parameters(), model_fit = performance::performance(), model_standardize = parameters::standardize_parameters() )
.df |
The original |
model_desc |
a human readable name you would like to give the model. |
code |
literal model syntax you would like to run. You can use
|
model_coefs |
a function to extract coefficients from the model object.
The default is to use |
model_fit |
a function to summarize model fit statistics. The default is
to use |
model_standardize |
a function to calculate standardized coefficients
from the model object. The default is to use
|
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_model("linear model", lm({dvs} ~ {ivs} * {mods}))library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_model("linear model", lm({dvs} ~ {ivs} * {mods}))
Add arbitrary summary statistics to a multiverse pipeline
add_model_descriptives(.df, desc_name, code)add_model_descriptives(.df, desc_name, code)
.df |
The original |
desc_name |
a character string. A descriptive name for what the summary statistics you want to compute over the data passed to your model. |
code |
the literal code you would like to execute. For summary
statistics, |
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_model("linear model", lm({dvs} ~ {ivs} * {mods})) |> add_model_descriptives( "descriptives", summarize(body_mass_mean = mean({dvs}), .by = c(include2)) )library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_model("linear model", lm({dvs} ~ {ivs} * {mods})) |> add_model_descriptives( "descriptives", summarize(body_mass_mean = mean({dvs}), .by = c(include2)) )
Add parameter keys names for later use in summarizing model effects
add_parameter_keys(.df, parameter_group, parameter_name)add_parameter_keys(.df, parameter_group, parameter_name)
.df |
The original |
parameter_group |
character, a name for the parameter of interest |
parameter_name |
quoted or unquoted names of variables involved in a
particular parameter of interest. Usually this is just a variable in your
model (e.g., a main effect of your iv). However, it could also be an
interaction term or some other term. You can use |
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear model", lm({dvs} ~ {ivs} * {mods})) |> add_parameter_keys("my_interaction", "{ivs}:{mods}") |> add_parameter_keys("my_main_effect", {ivs})library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear model", lm({dvs} ~ {ivs} * {mods})) |> add_parameter_keys("my_interaction", "{ivs}:{mods}") |> add_parameter_keys("my_main_effect", {ivs})
Add arbitrary postprocessing code to a multiverse pipeline
add_postprocess(.df, postprocess_name, code)add_postprocess(.df, postprocess_name, code)
.df |
The original |
postprocess_name |
a character string. A descriptive name for what the postprocessing step accomplishes. |
code |
the literal code you would like to execute after each analysis. The code should be written to work with pipes (i.e., For example, if you fit a simple linear model like:
|
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_model("linear model", lm({dvs} ~ {ivs} * {mods})) |> add_postprocess("analysis of variance", aov())library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_model("linear model", lm({dvs} ~ {ivs} * {mods})) |> add_postprocess("analysis of variance", aov())
Add arbitrary preprocessing code to a multiverse analysis pipeline
add_preprocess(.df, process_name, code)add_preprocess(.df, process_name, code)
.df |
The original |
process_name |
a character string. A descriptive name for what the preprocessing step accomplishes. |
code |
the literal code you would like to execute after data are
filtered. The code should be written to work with pipes (i.e., |
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))')library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess("scale_iv", 'mutate({ivs} = scale({ivs}))')
Add sub groups to the multiverse pipeline
add_subgroups(.df, ..., .only = NULL)add_subgroups(.df, ..., .only = NULL)
.df |
The original |
... |
sub group variable(s) in your data whose values specify groupings. |
.only |
a character vector of sub group values to include. The default includes all sub group values for each sub group variable. |
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500), group = sample(1:3, size = 500, replace = TRUE) ) the_data |> add_subgroups(group) the_data |> add_subgroups(group, .only = c(1,3))library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500), group = sample(1:3, size = 500, replace = TRUE) ) the_data |> add_subgroups(group) the_data |> add_subgroups(group, .only = c(1,3))
Add a set of variable alternatives to a multiverse pipeline
add_variables(.df, var_group, ...)add_variables(.df, var_group, ...)
.df |
The original |
var_group |
a character string. Indicates the name of the current set. For example, "primary_iv" could indicate this set are alternatives of the main predictor in an analysis. |
... |
the bare unquoted names of the variables to include as alternative options for this variable set. You can also use tidyselect to select variables. |
a data.frame with three columns: type, group, and code. Type
indicates the decision type, group is a decision, and the code is the
actual code that will be executed. If part of a pipe, the current set of
decisions will be appended as new rows.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod"))library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) the_data |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod"))
Perform all analyses over a complete decision grid
analyze_grid( .grid, save_model = FALSE, show_progress = TRUE, libraries = NULL, ... )analyze_grid( .grid, save_model = FALSE, show_progress = TRUE, libraries = NULL, ... )
.grid |
a |
save_model |
logical, indicates whether to save the model object in its
entirety. The default is |
show_progress |
logical, whether to show a progress bar while running. |
libraries |
a vector of character strings naming the packages you want
to load when executing parallel processing. Internally, this will call
|
... |
this also reserved for parallel processing. Any custom functions
you might use your pipeline (e.g., a custom post processing step), can be
passed here in the form of |
a single tibble containing tidied results for the model and
any post-processing tests/tasks. For each unique test (e.g., an lm
or aov called on an lm), a list column with the function name
is created with parameters and
performance and any warnings or messages printed
while fitting the models.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess(process_name = "scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_preprocess(process_name = "scale_mod", mutate({mods} := scale({mods}))) |> add_model("no covariates",lm({dvs} ~ {ivs} * {mods})) |> add_model("covariate", lm({dvs} ~ {ivs} * {mods} + cov1)) |> add_postprocess("aov", aov()) pipeline_grid <- expand_decisions(full_pipeline) # analyze the grid analyzed_grid <- analyze_grid(pipeline_grid[1:10,])library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess(process_name = "scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_preprocess(process_name = "scale_mod", mutate({mods} := scale({mods}))) |> add_model("no covariates",lm({dvs} ~ {ivs} * {mods})) |> add_model("covariate", lm({dvs} ~ {ivs} * {mods} + cov1)) |> add_postprocess("aov", aov()) pipeline_grid <- expand_decisions(full_pipeline) # analyze the grid analyzed_grid <- analyze_grid(pipeline_grid[1:10,])
Analyze a complete decision grid in parallel
analyze_grid_parallel( .grid, save_model = FALSE, show_progress = TRUE, furrr_globals = NULL, furrr_packages = c("multitool", "dplyr", "tidyr") )analyze_grid_parallel( .grid, save_model = FALSE, show_progress = TRUE, furrr_globals = NULL, furrr_packages = c("multitool", "dplyr", "tidyr") )
.grid |
a |
save_model |
logical, indicates whether to save the model object in its
entirety. The default is |
show_progress |
logical, whether to show a progress bar while running. |
furrr_globals |
any global objects to pass to |
furrr_packages |
character vector, any packages to load inside parallel environments |
a single tibble containing tidied results for the model and
any post-processing tests/tasks. For each unique test (e.g., an lm
or aov called on an lm), a list column with the function name
is created with parameters and
performance and any warnings or messages printed
while fitting the models.
library(tidyverse) library(multitool) library(furrr) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess(process_name = "scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_preprocess(process_name = "scale_mod", mutate({mods} := scale({mods}))) |> add_model("no covariates",lm({dvs} ~ {ivs} * {mods})) |> add_model("covariate", lm({dvs} ~ {ivs} * {mods} + cov1)) |> add_postprocess("aov", aov()) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse plan(multisession, workers = 4) the_multiverse <- analyze_grid_parallel(pipeline_grid[4,]) plan(sequential)library(tidyverse) library(multitool) library(furrr) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess(process_name = "scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_preprocess(process_name = "scale_mod", mutate({mods} := scale({mods}))) |> add_model("no covariates",lm({dvs} ~ {ivs} * {mods})) |> add_model("covariate", lm({dvs} ~ {ivs} * {mods} + cov1)) |> add_postprocess("aov", aov()) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse plan(multisession, workers = 4) the_multiverse <- analyze_grid_parallel(pipeline_grid[4,]) plan(sequential)
Quantifies how much each decision type (e.g., filters, variables, models) contributes to the total variance in a focal estimand across all decision specifications. Uses variance-based sensitivity analysis to partition variance into main effects, interaction effects, and total effects for each decision set.
assess_decisions(.unpacked, .estimand, .by = NULL)assess_decisions(.unpacked, .estimand, .by = NULL)
.unpacked |
A |
.estimand |
The numeric outcome variable to decompose. Defaults to
|
.by |
Optional grouping variable(s) for stratified decomposition. The variance decomposition will be computed separately for each group. Useful for examining whether decision importance varies across different model variables or subgroups. Use unquoted column names. |
This function implements a Sobol-style decomposition where "decision sets" (e.g., all filter decisions) are treated as factors whose combinations produce different specifications. The decomposition reveals which analytical choices have the strongest influence on results.
The function computes four complementary variance measures:
Main effect (first-order Sobol): How much does this decision matter on average, ignoring interactions? Computed by averaging the estimand over all combinations of other decisions, then computing the variance of those conditional means.
Total effect (total-order Sobol): How much variance remains when we fix all decisions except this one? Includes the decision's main effect plus all interactions involving it.
Interaction effect: The gap between total and main effects, showing how much the decision's impact depends on other choices.
Variance reduction: How much would total variance decrease if we picked one option for this decision? Useful for prioritizing which decisions to "fix" to reduce result instability.
Interpretation: A decision with high main effect drives results independently. A decision with high interaction effect matters, but differently depending on other choices. A decision with low total effect is relatively inconsequential.
A data.frame with one row per decision set, containing:
Name of the decision type (e.g., "filters", "variables", "model")
First-order Sobol index. Proportion of total variance explained by this decision set alone, averaging over all other decisions. Ranges from 0 (no effect) to 1 (explains all variance)
Total Sobol index. Proportion of total variance explained by this decision set including all its interactions with other decisions. Always ≥ main_effect
Total effect minus main effect. Proportion of variance due to interactions between this decision and others
Proportion of variance eliminated by fixing this decision to a single option. Also called "expected reduction in variance" or EVPPI (Expected Value of Perfect Parameter Information)
If .by is specified, grouping columns appear first.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE) ) # Run a multiverse analysis results <- the_data |> add_filters(include1 == 0, include2 != 3) |> add_variables("ivs", iv1, iv2) |> add_variables("dvs", dv1, dv2) |> add_model("linear", lm({dvs} ~ {ivs})) |> expand_decisions() |> analyze_grid() # Decompose variance in standardized coefficients unpacked <- unpack_model_parameters(results) assess_decisions(unpacked, .estimand = std_coefficient) # Which decisions matter most for p-values? assess_decisions(unpacked, .estimand = p) # Decompose separately for each parameter assess_decisions(unpacked, .estimand = p, .by = dvs)library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE) ) # Run a multiverse analysis results <- the_data |> add_filters(include1 == 0, include2 != 3) |> add_variables("ivs", iv1, iv2) |> add_variables("dvs", dv1, dv2) |> add_model("linear", lm({dvs} ~ {ivs})) |> expand_decisions() |> analyze_grid() # Decompose variance in standardized coefficients unpacked <- unpack_model_parameters(results) assess_decisions(unpacked, .estimand = std_coefficient) # Which decisions matter most for p-values? assess_decisions(unpacked, .estimand = p) # Decompose separately for each parameter assess_decisions(unpacked, .estimand = p, .by = dvs)
Summarizes the distribution of a particular model parameter, fit statistics, or any other values returned by the focal modeling process or a post-processing step. The summaries are computed over all specifications in the analysis grid. This function helps evaluate whether results are robust to analytical decisions by computing key distributional properties and sign consistency metrics.
assess_robustness(.multi, .estimand, zero_threshold = 0.01, .by = NULL)assess_robustness(.multi, .estimand, zero_threshold = 0.01, .by = NULL)
.multi |
An object returned by |
.estimand |
The parameter or coefficient to assess. Defaults to
|
zero_threshold |
Numeric value defining the threshold for "practically
zero" effects. Effects between |
.by |
Optional grouping variable(s) for stratified summaries. Useful for examining robustness within specific subsets of decisions (e.g., different models or subgroups). Use unquoted column names. |
A data.frame with the following columns:
Name of the summarized metric (e.g., "std_coef", "AIC")
Type of metric: "parameter" for model coefficients or "fit index" for model fit statistics
The parameter being summarized (e.g., variable name) or "full model" for fit indices
Number of specifications contributing to the summary
Distributional summaries of the metric
Proportion of specifications with positive, negative, or practically zero effects
Shannon entropy of the sign distribution, measuring inconsistency in effect direction across specifications. Ranges from 0 (perfect consistency) to ~1.58 (maximum inconsistency)
All numeric values are rounded to 5 decimal places.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE) ) # Run a multiverse analysis results <- the_data |> add_filters(include1 == 0, include2 != 3) |> add_variables("ivs", iv1, iv2) |> add_variables("dvs", dv1, dv2) |> add_model("linear", lm({dvs} ~ {ivs})) |> expand_decisions() |> analyze_grid() # Assess robustness of standardized coefficients assess_robustness(results, .estimand = std_coefficient) # Assess raw coefficients assess_robustness(results, .estimand = coefficient) # Assess std_coef with custom zero threshold assess_robustness(results, .estimand = std_coefficient, zero_threshold = .05) # Stratified assessment by model type assess_robustness(results, .estimand = std_coefficient, .by = dvs)library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE) ) # Run a multiverse analysis results <- the_data |> add_filters(include1 == 0, include2 != 3) |> add_variables("ivs", iv1, iv2) |> add_variables("dvs", dv1, dv2) |> add_model("linear", lm({dvs} ~ {ivs})) |> expand_decisions() |> analyze_grid() # Assess robustness of standardized coefficients assess_robustness(results, .estimand = std_coefficient) # Assess raw coefficients assess_robustness(results, .estimand = coefficient) # Assess std_coef with custom zero threshold assess_robustness(results, .estimand = std_coefficient, zero_threshold = .05) # Stratified assessment by model type assess_robustness(results, .estimand = std_coefficient, .by = dvs)
Summarize multiverse parameters
condense(.unpacked, .what, .how, .group = NULL, list_cols = TRUE) organize(.unpacked, .what, .group = NULL, focused = TRUE)condense(.unpacked, .what, .how, .group = NULL, list_cols = TRUE) organize(.unpacked, .what, .group = NULL, focused = TRUE)
.unpacked |
a set of results from |
.what |
the column from the unpacked results you'd like to organize |
.how |
a named list. The list should contain summary functions (e.g., mean or median) the user would like to compute over the individual estimates from the multiverse |
.group |
a grouping column, usually from the specifications, that you like to sort within. This will give you sorted output by the levels of the grouping variable. |
list_cols |
logical, whether to create list columns for the raw values of any summarized columns. Useful for creating visualizations and tables. Default is TRUE. |
focused |
logical, defaults to |
a summarized tibble containing a column for each summary
method from .how
organize(): Sort and organize results by size and sign.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear_model", lm({dvs} ~ {ivs} * {mods} + cov1)) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse the_multiverse <- analyze_grid(pipeline_grid[1:10,]) # Reveal and condense the_multiverse |> unpack_model_parameters() |> filter(str_detect(parameter, "iv")) |> condense(coefficient, list(mean = mean, median = median))library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear_model", lm({dvs} ~ {ivs} * {mods} + cov1)) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse the_multiverse <- analyze_grid(pipeline_grid[1:10,]) # Reveal and condense the_multiverse |> unpack_model_parameters() |> filter(str_detect(parameter, "iv")) |> condense(coefficient, list(mean = mean, median = median))
create_blueprint_graph()
will still work but I recommend using visualize_pipeline() instead,
which has more options and outputs ggplot2 objects instead of grViz graphs
create_blueprint_graph( .pipeline, splines = "line", render = TRUE, show_code = FALSE, ... )create_blueprint_graph( .pipeline, splines = "line", render = TRUE, show_code = FALSE, ... )
.pipeline |
a |
splines |
options for how to draw edges (lines) for a grViz diagram |
render |
whether to render the graph or just output grViz code |
show_code |
whether to show the code that generated the diagram |
... |
additional options passed to |
grViz graph of your pipeline
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) create_blueprint_graph(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) create_blueprint_graph(full_pipeline)
Detect total number of analysis pipelines
detect_multiverse_n(.pipeline, include_models = TRUE)detect_multiverse_n(.pipeline, include_models = TRUE)
.pipeline |
a |
include_models |
Whether to count alternative models if you have more
than one |
a numeric, the total number of unique analysis pipelines
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_multiverse_n(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_multiverse_n(full_pipeline)
Detect total number of filtering expressions your pipelines
detect_n_filters(.pipeline)detect_n_filters(.pipeline)
.pipeline |
a |
a numeric, the total number of filtering expressions
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_filters(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_filters(full_pipeline)
Detect total number of models in your pipelines
detect_n_models(.pipeline)detect_n_models(.pipeline)
.pipeline |
a |
a numeric, the total number of unique models
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_models(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_models(full_pipeline)
Detect total number of subgroups in your pipelines
detect_n_subgroups(.pipeline)detect_n_subgroups(.pipeline)
.pipeline |
a |
a numeric, the total number of unique subgroups, including subgroup combinations
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_subgroups(include2) |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_variables(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_subgroups(include2) |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_variables(full_pipeline)
Detect total number of variable sets in your pipelines
detect_n_variables(.pipeline)detect_n_variables(.pipeline)
.pipeline |
a |
a numeric, the total number of unique variable sets
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_variables(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) detect_n_variables(full_pipeline)
Expand a set of multiverse decisions into all possible combinations
expand_decisions( .pipeline, .collect_after = NULL, .pointer_path = NULL, .subgroup_in_path = FALSE )expand_decisions( .pipeline, .collect_after = NULL, .pointer_path = NULL, .subgroup_in_path = FALSE )
.pipeline |
a |
.collect_after |
default is NULL. Most of the time you will not use this
argument. However, if your data come from a database, you can use this
argument to call |
.pointer_path |
a string specifying a path to create a external pointer object. This is only necessary if you are using data from an external source. Defaults to NULL. |
.subgroup_in_path |
logical, whether to place the subgroup filters in a file path. This is only relevant if you are using an external pointer (e.g., an Arrow filesystem database). Placing the subgroup filter in the path itself might provide a performance boost over reading the entire filesystem and then performing subgoup filtering. |
a nested data.frame containing all combinations of arbitrary
decisions for a multiverse analysis. Decision types will become list
columns matching the type of decisions called along the pipeline (e.g.,
filters, variables, etc.). Any decisions containing
glue syntax will be populated with the relevant
information.
library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess(process_name = "scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_preprocess(process_name = "scale_mod", mutate({mods} := scale({mods}))) |> add_model("no covariates", lm({dvs} ~ {ivs} * {mods})) |> add_model("with covariates", lm({dvs} ~ {ivs} * {mods} + cov1)) |> add_postprocess("aov", aov()) pipeline_expanded <- expand_decisions(full_pipeline)library(tidyverse) library(multitool) the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2, include3 > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_preprocess(process_name = "scale_iv", 'mutate({ivs} = scale({ivs}))') |> add_preprocess(process_name = "scale_mod", mutate({mods} := scale({mods}))) |> add_model("no covariates", lm({dvs} ~ {ivs} * {mods})) |> add_model("with covariates", lm({dvs} ~ {ivs} * {mods} + cov1)) |> add_postprocess("aov", aov()) pipeline_expanded <- expand_decisions(full_pipeline)
show_code is the generic function. All show_code* functions are
simple wrappers of show_code.
show_code( .grid, decision_num, .step = "model", .model_summary = NULL, .post_step = NULL, .execute = FALSE ) show_code_subgroups(.grid, decision_num, ...) show_code_filters(.grid, decision_num, ...) show_code_preprocess(.grid, decision_num, ...) show_code_model(.grid, decision_num, ...) show_code_postprocess(.grid, decision_num, ...)show_code( .grid, decision_num, .step = "model", .model_summary = NULL, .post_step = NULL, .execute = FALSE ) show_code_subgroups(.grid, decision_num, ...) show_code_filters(.grid, decision_num, ...) show_code_preprocess(.grid, decision_num, ...) show_code_model(.grid, decision_num, ...) show_code_postprocess(.grid, decision_num, ...)
.grid |
a full decision grid created by |
decision_num |
numeric. Indicates which decision set in the grid to show underlying code. |
.step |
a point along the pipeline for which you would like to show the underlying code. Defaults to the model. |
.model_summary |
a model summary function such as
|
.post_step |
Only relevant if you are exposing a postprocessing step. If you have more than one postprocess, you can specify which you would like to expose by index or by name. |
.execute |
logical, whether or not to run the code as well as print it. |
... |
additional arguments passed to |
Each show_code* function should be self-explanatory - they indicate
where along the multiverse pipeline to extract code. The goal of these
functions is to create a window into each data/model combination and allow
the user to inspect specific decisions straight from the code that produced
it.
the code that generated results up to the specified point in an analysis pipeline.
show_code_subgroups(): Show the code up to the subgroups stage
show_code_filters(): Show the code up to the filtering stage
show_code_preprocess(): Show the code up to the preprocessing stage
show_code_model(): Show the code up to the modeling stage
show_code_postprocess(): Show the code up to the post-processing stage
Summarize samples sizes for each unique filtering expression
summarize_filter_ns(.pipeline)summarize_filter_ns(.pipeline)
.pipeline |
a |
a tibble with each row representing a filtering expression and
four columns: filter_expression, variable, n_retained,
and n_excluded.
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) summarize_filter_ns(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) summarize_filter_ns(full_pipeline)
Unpack a component of your analyzed grid
unpack_results(.multi, .what, .which = NULL, .unpack_specs = "wide") unpack_model_parameters(.multi, effect_key = NULL, .unpack_specs = "wide") unpack_model_performance(.multi, .unpack_specs = "wide") unpack_model_warnings(.multi, .unpack_specs = "wide") unpack_model_messages(.multi, .unpack_specs = "wide") unpack_postprocess(.multi, .which, .unpack_specs = "wide")unpack_results(.multi, .what, .which = NULL, .unpack_specs = "wide") unpack_model_parameters(.multi, effect_key = NULL, .unpack_specs = "wide") unpack_model_performance(.multi, .unpack_specs = "wide") unpack_model_warnings(.multi, .unpack_specs = "wide") unpack_model_messages(.multi, .unpack_specs = "wide") unpack_postprocess(.multi, .which, .unpack_specs = "wide")
.multi |
a multiverse list-column |
.what |
the name of a list-column you would like to unpack |
.which |
any sub-list columns you would like to unpack |
.unpack_specs |
character, options are |
effect_key |
character, if you added parameter keys to your pipeline, you can specify if you would like filter the parameters using one of your parameter keys. This is useful when different variables are being switched out across the multiverse but represent the same effect of interest. |
the unnested part of the multiverse requested. This usually contains the particular estimates or statistics you would like to analyze over the decision grid specified.
unpack_model_parameters(): Unpack the model parameters
unpack_model_performance(): Unpack the model performance
unpack_model_warnings(): Unpack the model warnings
unpack_model_messages(): Unpack the model messages
unpack_postprocess(): Unpack a post-processing result
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear_model", lm({dvs} ~ {ivs} * {mods} + cov1)) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse the_multiverse <- analyze_grid(pipeline_grid[1:10,]) # Reveal results of the linear model the_multiverse |> unpack_results(model_fitted, model_parameters)library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear_model", lm({dvs} ~ {ivs} * {mods} + cov1)) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse the_multiverse <- analyze_grid(pipeline_grid[1:10,]) # Reveal results of the linear model the_multiverse |> unpack_results(model_fitted, model_parameters)
Unpack the decision grid of specifications for your modeling pipeline
unpack_specs(.multi, .how = "wide")unpack_specs(.multi, .how = "wide")
.multi |
a multiverse list-column |
.how |
character, options are |
the unnested specifications of the analysis grid.
library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear_model", lm({dvs} ~ {ivs} * {mods} + cov1)) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse the_multiverse <- analyze_grid(pipeline_grid[1:10,]) # Reveal results of the linear model the_multiverse |> unpack_specs("wide")library(tidyverse) library(multitool) # Simulate some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod1 = rnorm(500), mod2 = rnorm(500), mod3 = rnorm(500), cov1 = rnorm(500), cov2 = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # Decision pipeline full_pipeline <- the_data |> add_filters(include1 == 0,include2 != 3,include2 != 2,scale(include3) > -2.5) |> add_variables("ivs", iv1, iv2, iv3) |> add_variables("dvs", dv1, dv2) |> add_variables("mods", starts_with("mod")) |> add_model("linear_model", lm({dvs} ~ {ivs} * {mods} + cov1)) pipeline_grid <- expand_decisions(full_pipeline) # Run the whole multiverse the_multiverse <- analyze_grid(pipeline_grid[1:10,]) # Reveal results of the linear model the_multiverse |> unpack_specs("wide")
Visualize an analysis pipeline workflow
visualize_pipeline( .pipeline, layout = "vertical", decision_details = FALSE, combinations_detail = FALSE, text_sizing = NULL, node_space = NULL, arrow_spacing = NULL, box_space = NULL, h_space = NULL, v_space = NULL )visualize_pipeline( .pipeline, layout = "vertical", decision_details = FALSE, combinations_detail = FALSE, text_sizing = NULL, node_space = NULL, arrow_spacing = NULL, box_space = NULL, h_space = NULL, v_space = NULL )
.pipeline |
a |
layout |
the orientation of the diagram, can be "horizontal" or "vertical" |
decision_details |
logical, whether to provide finer grained details
pipeline decisions and their alternatives. Defaults to |
combinations_detail |
logical, whether to add details on how the
analysis space expand given the cross products of pipeline steps and their
alternatives. Defaults to |
text_sizing |
numeric. when not |
node_space |
numeric. when not |
arrow_spacing |
numeric. when not |
box_space |
numeric. when not |
h_space |
numeric vector of length 2. when not |
v_space |
numeric vector of length 2. when not |
ggplot2 object visualizing your analysis pipeline
library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) visualize_pipeline(full_pipeline)library(tidyverse) library(multitool) # create some data the_data <- data.frame( id = 1:500, iv1 = rnorm(500), iv2 = rnorm(500), iv3 = rnorm(500), mod = rnorm(500), dv1 = rnorm(500), dv2 = rnorm(500), include1 = rbinom(500, size = 1, prob = .1), include2 = sample(1:3, size = 500, replace = TRUE), include3 = rnorm(500) ) # create a pipeline blueprint full_pipeline <- the_data |> add_filters(include1 == 0, include2 != 3, include3 > -2.5) |> add_variables(var_group = "ivs", iv1, iv2, iv3) |> add_variables(var_group = "dvs", dv1, dv2) |> add_model("linear model", lm({dvs} ~ {ivs} * mod)) visualize_pipeline(full_pipeline)