Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 36 additions & 35 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
Package: adonisplus
Type: Package
Title: Bonus material for the adonis function
Version: 0.2.0
Authors@R:
c(person(given = "Ceylan",
family = "Tanes",
email = "[email protected]",
role = c("aut", "cre")),
person(given = "Kyle",
family = "Bittinger",
email = "[email protected]",
role = c("aut")))
Description: Contains a tidy method for adonis results, custom permutation
functions, and a version of adonis for repeated measures designs.
License: GPL-3 + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
Imports:
generics (>= 0.0.2),
rlang,
vegan,
usedist,
dplyr (>= 1.0.0),
tibble (>= 3.0.0),
ape,
magrittr,
ggplot2,
stringr
Depends:
R (>= 2.10)
Suggests:
covr,
testthat
Package: adonisplus
Type: Package
Title: Bonus material for the adonis function
Version: 0.2.0
Authors@R:
c(person(given = "Ceylan",
family = "Tanes",
email = "[email protected]",
role = c("aut", "cre")),
person(given = "Kyle",
family = "Bittinger",
email = "[email protected]",
role = c("aut")))
Description: Contains a tidy method for adonis results, custom permutation
functions, and a version of adonis for repeated measures designs.
License: GPL-3 + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
Imports:
generics (>= 0.0.2),
rlang,
vegan,
usedist,
dplyr (>= 1.0.0),
tibble (>= 3.0.0),
ape,
magrittr,
ggplot2,
stringr
Depends:
R (>= 2.10)
Suggests:
covr,
testthat

270 changes: 135 additions & 135 deletions R/adonis-custom.R
Original file line number Diff line number Diff line change
@@ -1,135 +1,135 @@
#' Permutational multivariate analysis of variance, plus
#'
#' @param data Data to use in the test.
#' @param distmat Distance matrix, either a matrix or an object of class
#' \code{dist} The distance matrix will automatically be filtered and
#' re-arranged to match the rows of \code{data}.
#' @param formula Model formula. The LHS (the part to the left of the tilde)
#' must be "distmat". The formula can either be a literal formula or a string
#' that can be converted into a formula.
#' @param sample_id_var Variable in \code{data} that defines the sample IDs,
#' i.e. the identifiers that correspond to each item in the distance matrix.
#' @param rep_meas_var Variable in \code{data} that indicates the repeated
#' measures in the experiment, typically a subject ID or cage ID.
#' @param shuffle Named character vector that specifies how to carry out
#' restricted permutations for variables in \code{data}. Names should
#' correspond to variables in \code{data}. Values should be either
#' \code{"between"} or \code{"within"}, meaning that values should be
#' shuffled between or within levels of \code{rep_meas_var}. See Details for
#' more info.
#' @param permutations Number of permutations.
#' @param seed Random seed, set just before the initial call to
#' \code{vegan::adonis()}.
#' @return The results from \code{vegan::adonis()} in tidy format.
#' @details
#' A typical experimental design has subjects in a few groups sampled
#' repeatedly over a few time points. If the variable denoting the group is
#' \code{study_group} and the variable denoting the time point is
#' \code{time_point}, then the \code{shuffle} argument would be
#' \code{c(study_group = "between", time_point = "within")}. During the
#' permutation stage, the values of the study group will be shuffled between
#' subjects, preserving the value within each subject. Conversely, the values
#' of the time point will be shuffled only within each subject.
#' @export
adonisplus <- function(data, distmat, formula, sample_id_var = SampleID,
rep_meas_var = subject_id, shuffle = NULL,
permutations = 999, seed = 42) {
sample_ids <- data %>%
dplyr::pull({{ sample_id_var }}) %>%
as.character()
distmat <- usedist::dist_subset(distmat, sample_ids)
formula <- stats::as.formula(formula)
check_lhs(formula, distmat ~ .)
assign("distmat", distmat, environment(formula))
set.seed(seed)
a_observed <- vegan::adonis2(
formula = formula, data = data, permutations = permutations
)
result <- tidy.anova.cca(a_observed)
if (!is.null(shuffle)) {
rep_meas_vals <- data %>%
dplyr::pull({{ rep_meas_var }})
# Here, vars is the variables that will undergo custom permutations
vars <- names(shuffle)
# TODO: handle errors from missing variable names
# Matrix of variables by terms
vars_by_terms <- attr(stats::terms(formula), "factors")
nterms <- length(colnames(vars_by_terms))
# TODO: handle errors due to variable names not matching
# Keep only the variables that we will shuffle manually
vars_idx <- match(vars, rownames(vars_by_terms))
# Need drop = FALSE or R converts to a vector when nterms == 1
vars_by_terms <- vars_by_terms[vars_idx, , drop = FALSE]
term_idx <- which(colSums(vars_by_terms) > 0)
f_observed <- result$statistic[term_idx]
fs_permuted <- replicate(permutations, {
trial_data <- data
for (var in vars) {
method <- shuffle[var]
shuffle_functions <- list(
between = shuffle_between_groups,
within = shuffle_within_groups
)
# Use match.arg in case someone writes "bet" instead of "between"
method <- match.arg(method, names(shuffle_functions))
fcn <- shuffle_functions[[method]]
old_vals <- trial_data[[var]]
new_vals <- fcn(old_vals, rep_meas_vals)
trial_data[[var]] <- new_vals
}
trial_a <- vegan::adonis2(formula, trial_data, permutations = 4)
trial_result <- tidy.anova.cca(trial_a)
trial_result$statistic[term_idx]
})
if (nterms > 1) {
fs_greater <- sweep(cbind(f_observed, fs_permuted), 1, f_observed, `>=`)
p_permuted <- apply(fs_greater, 1, function(x) sum(x) / length(x))
} else {
fs_greater <- c(f_observed, fs_permuted) >= f_observed
p_permuted <- sum(fs_greater) / length(fs_greater)
}
result$p.value[term_idx] <- p_permuted
}
result
}
#' Post-hoc tests for permutational multivariate analysis of variance
#'
#' @param data Data to use in the test.
#' @param ... Additional arguments passed to \code{adonisplus()}.
#' @param which Variable on which to perform post-hoc comparisons.
#' @param alpha The threshold for rejecting the null hypothesis in the main
#' comparison.
#' @return The results in tidy format.
#' @export
adonispost <- function(data, ..., which = study_group, alpha = 0.05) {
var_name <- rlang::as_name(rlang::ensym(which))
result_main <- adonisplus(data, ...) %>%
dplyr::mutate(comparison = paste("All", var_name)) %>%
dplyr::select(comparison, term, dplyr::everything()) %>%
dplyr::filter(!(term %in% c("Residual", "Total")))
var_levels <- data %>%
dplyr::pull({{ which }}) %>%
as.factor() %>%
levels()
pairs <- utils::combn(var_levels, 2, simplify = FALSE)
make_pairwise_comparison <- function(pair) {
pair_data <- data %>%
dplyr::filter({{ which }} %in% pair)
adonisplus(pair_data, ...) %>%
dplyr::mutate(comparison = paste(pair, collapse = " - ")) %>%
dplyr::select(comparison, term, dplyr::everything()) %>%
dplyr::filter(!(term %in% c("Residual", "Total")))
}
result_posthoc <- lapply(pairs, make_pairwise_comparison) %>%
dplyr::bind_rows()
dplyr::bind_rows(result_main, result_posthoc)
}
#' Permutational multivariate analysis of variance, plus
#'
#' @param data Data to use in the test.
#' @param distmat Distance matrix, either a matrix or an object of class
#' \code{dist} The distance matrix will automatically be filtered and
#' re-arranged to match the rows of \code{data}.
#' @param formula Model formula. The LHS (the part to the left of the tilde)
#' must be "distmat". The formula can either be a literal formula or a string
#' that can be converted into a formula.
#' @param sample_id_var Variable in \code{data} that defines the sample IDs,
#' i.e. the identifiers that correspond to each item in the distance matrix.
#' @param rep_meas_var Variable in \code{data} that indicates the repeated
#' measures in the experiment, typically a subject ID or cage ID.
#' @param shuffle Named character vector that specifies how to carry out
#' restricted permutations for variables in \code{data}. Names should
#' correspond to variables in \code{data}. Values should be either
#' \code{"between"} or \code{"within"}, meaning that values should be
#' shuffled between or within levels of \code{rep_meas_var}. See Details for
#' more info.
#' @param permutations Number of permutations.
#' @param seed Random seed, set just before the initial call to
#' \code{vegan::adonis()}.
#' @return The results from \code{vegan::adonis()} in tidy format.
#' @details
#' A typical experimental design has subjects in a few groups sampled
#' repeatedly over a few time points. If the variable denoting the group is
#' \code{study_group} and the variable denoting the time point is
#' \code{time_point}, then the \code{shuffle} argument would be
#' \code{c(study_group = "between", time_point = "within")}. During the
#' permutation stage, the values of the study group will be shuffled between
#' subjects, preserving the value within each subject. Conversely, the values
#' of the time point will be shuffled only within each subject.
#' @export
adonisplus <- function(data, distmat, formula, sample_id_var = SampleID,
rep_meas_var = subject_id, shuffle = NULL,
permutations = 999, seed = 42) {
sample_ids <- data %>%
dplyr::pull({{ sample_id_var }}) %>%
as.character()
distmat <- usedist::dist_subset(distmat, sample_ids)
formula <- stats::as.formula(formula)
check_lhs(formula, distmat ~ .)
assign("distmat", distmat, environment(formula))

set.seed(seed)
a_observed <- vegan::adonis2(
formula = formula, data = data, permutations = permutations
)
result <- tidy.anova.cca(a_observed)

if (!is.null(shuffle)) {
rep_meas_vals <- data %>%
dplyr::pull({{ rep_meas_var }})

# Here, vars is the variables that will undergo custom permutations
vars <- names(shuffle)
# TODO: handle errors from missing variable names
# Matrix of variables by terms
vars_by_terms <- attr(stats::terms(formula), "factors")
nterms <- length(colnames(vars_by_terms))
# TODO: handle errors due to variable names not matching
# Keep only the variables that we will shuffle manually
vars_idx <- match(vars, rownames(vars_by_terms))
# Need drop = FALSE or R converts to a vector when nterms == 1
vars_by_terms <- vars_by_terms[vars_idx, , drop = FALSE]
term_idx <- which(colSums(vars_by_terms) > 0)

f_observed <- result$statistic[term_idx]
fs_permuted <- replicate(permutations, {
trial_data <- data
for (var in vars) {
method <- shuffle[var]
shuffle_functions <- list(
between = shuffle_between_groups,
within = shuffle_within_groups
)
# Use match.arg in case someone writes "bet" instead of "between"
method <- match.arg(method, names(shuffle_functions))
fcn <- shuffle_functions[[method]]
old_vals <- trial_data[[var]]
new_vals <- fcn(old_vals, rep_meas_vals)
trial_data[[var]] <- new_vals
}
trial_a <- vegan::adonis2(formula, trial_data, permutations = 4)
trial_result <- tidy.anova.cca(trial_a)
trial_result$statistic[term_idx]
})

if (nterms > 1) {
fs_greater <- sweep(cbind(f_observed, fs_permuted), 1, f_observed, `>=`)
p_permuted <- apply(fs_greater, 1, function(x) sum(x) / length(x))
} else {
fs_greater <- c(f_observed, fs_permuted) >= f_observed
p_permuted <- sum(fs_greater) / length(fs_greater)
}
result$p.value[term_idx] <- p_permuted
}
result
}

#' Post-hoc tests for permutational multivariate analysis of variance
#'
#' @param data Data to use in the test.
#' @param ... Additional arguments passed to \code{adonisplus()}.
#' @param which Variable on which to perform post-hoc comparisons.
#' @param alpha The threshold for rejecting the null hypothesis in the main
#' comparison.
#' @return The results in tidy format.
#' @export
adonispost <- function(data, ..., which = study_group, alpha = 0.05) {
var_name <- rlang::as_name(rlang::ensym(which))

result_main <- adonisplus(data, ...) %>%
dplyr::mutate(comparison = paste("All", var_name)) %>%
dplyr::select(comparison, term, dplyr::everything()) %>%
dplyr::filter(!(term %in% c("Residual", "Total")))

var_levels <- data %>%
dplyr::pull({{ which }}) %>%
as.factor() %>%
levels()
pairs <- utils::combn(var_levels, 2, simplify = FALSE)

make_pairwise_comparison <- function(pair) {
pair_data <- data %>%
dplyr::filter({{ which }} %in% pair)
adonisplus(pair_data, ...) %>%
dplyr::mutate(comparison = paste(pair, collapse = " - ")) %>%
dplyr::select(comparison, term, dplyr::everything()) %>%
dplyr::filter(!(term %in% c("Residual", "Total")))
}
result_posthoc <- lapply(pairs, make_pairwise_comparison) %>%
dplyr::bind_rows()
dplyr::bind_rows(result_main, result_posthoc)
}
Loading
Loading