Skip to content

Add by to chop() #1597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# tidyr (development version)

* `chop()` gains a `by` argument for specifying grouping columns, similar to
`nest(.by =)` (@hrryt, #1490).

* `fill()` gains a `.by` argument as an alternative to `dplyr::group_by()` for
applying the fill per group, similar to `nest(.by =)` and
`dplyr::mutate(.by =)` (@olivroy, #1439).
Expand Down
91 changes: 82 additions & 9 deletions R/chop.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,20 @@
#' @param data A data frame.
#' @param cols <[`tidy-select`][tidyr_tidy_select]> Columns to chop or unchop.
#'
#' If not supplied for `chop()`, then `cols` is derived as all columns _not_
#' selected by `by`.
#'
#' For `unchop()`, each column should be a list-column containing generalised
#' vectors (e.g. any mix of `NULL`s, atomic vector, S3 vectors, a lists,
#' or data frames).
#' @param by <[`tidy-select`][tidyr_tidy_select]> Columns to chop _by_; these
#' will not be chopped.
#'
#' `by` can be used in place of or in conjunction with columns supplied
#' through `cols`.
#'
#' If not supplied, then `by` is derived as all columns _not_ selected by
#' `cols`.
#' @param keep_empty By default, you get one row of output for each element
#' of the list that you are unchopping/unnesting. This means that if there's a
#' size-0 element (like `NULL` or an empty data frame or vector), then that
Expand All @@ -51,6 +62,19 @@
#' # cf nest
#' df %>% nest(data = c(y, z))
#'
#' # Specify variables to chop by (rather than variables to chop) using `by`
#' df %>% chop(by = x)
#'
#' # Use tidyselect syntax and helpers, just like in `dplyr::select()`
#' df %>% chop(any_of(c("y", "z")))
#'
#' # `cols` and `by` can be used together to drop columns you no longer need,
#' # or to chop the columns you are chopping by too.
#' # This drops `z`:
#' df %>% chop(y, by = x)
#' # This includes `x` in the chopped columns:
#' df %>% chop(everything(), by = x)
#'
#' # Unchop --------------------------------------------------------------------
#' df <- tibble(x = 1:4, y = list(integer(), 1L, 1:2, 1:3))
#' df %>% unchop(y)
Expand All @@ -65,20 +89,16 @@
#' df <- tibble(x = 1:3, y = list(NULL, tibble(x = 1), tibble(y = 1:2)))
#' df %>% unchop(y)
#' df %>% unchop(y, keep_empty = TRUE)
chop <- function(data, cols, ..., error_call = current_env()) {
chop <- function(data, cols = NULL, ..., by = NULL, error_call = current_env()) {
check_dots_empty0(...)
check_data_frame(data, call = error_call)
check_required(cols, call = error_call)

cols <- tidyselect::eval_select(
expr = enquo(cols),
data = data,
allow_rename = FALSE,
error_call = error_call
)
info <- chop_info(data, cols = {{ cols }}, by = {{ by }})
cols <- info$cols
by <- info$by

cols <- tidyr_new_list(data[cols])
keys <- data[setdiff(names(data), names(cols))]
keys <- data[by]

info <- vec_group_loc(keys)
keys <- info$key
Expand All @@ -94,6 +114,59 @@ chop <- function(data, cols, ..., error_call = current_env()) {
reconstruct_tibble(data, out)
}

chop_info <- function(
data,
cols = NULL,
by = NULL,
error_call = caller_env()
) {
by <- enquo(by)
cols <- enquo(cols)

cols_is_null <- quo_is_null(cols)
by_is_null <- quo_is_null(by)

if (cols_is_null && by_is_null) {
stop_use_cols_or_by(error_call = error_call)
}

names <- names(data)

cols <- names(tidyselect::eval_select(
expr = cols,
data = data,
allow_rename = FALSE,
error_call = error_call
))

by <- names(tidyselect::eval_select(
expr = by,
data = data,
allow_rename = FALSE,
error_call = error_call
))

if (cols_is_null) {
# Derive `cols` names from `by`
cols <- setdiff(names, by)
}

if (by_is_null) {
# Derive `by` names from `cols`
by <- setdiff(names, cols)
}

list(
cols = cols,
by = by
)
}

stop_use_cols_or_by <- function(error_call = caller_env()) {
message <- c("At least one of {.var cols} or {.var by} must be supplied.")
cli::cli_abort(message, call = error_call)
}

col_chop <- function(x, indices) {
ptype <- vec_ptype(x)

Expand Down
27 changes: 26 additions & 1 deletion man/chop.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/_snaps/chop.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
chop(df)
Condition
Error in `chop()`:
! `cols` is absent but must be supplied.
! At least one of `cols` or `by` must be supplied.

# incompatible ptype mentions the column (#1477)

Expand Down
35 changes: 35 additions & 0 deletions tests/testthat/test-chop.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,41 @@ test_that("can chop empty data frame (#1206)", {
)
})

test_that("can chop `by` columns (#1490)", {
df <- tibble(x = c(1, 1, 1, 2, 2), y = c(2, 1, 2, 3, 4), z = 1:5)

expect_identical(
chop(df, by = c(x, y)),
chop(df, z)
)
})

test_that("can combine `by` with `cols` (#1490)", {
df <- tibble(x = c(1, 1, 1, 2, 2), y = c(2, 1, 2, 3, 4), z = 1:5)

expect_identical(
chop(df, x, by = y),
chop(dplyr::select(df, -z), x)
)
})

test_that("union of `by` and `cols` results in renaming (#1490)", {
df <- tibble(x = 1, y = 1)
one <- vctrs::list_of(1)

with_options(rlib_name_repair_verbosity = "quiet", {
expect_identical(
invisible(chop(df, everything(), by = x)),
tibble(x = 1, x = one, y = one, .name_repair = "unique")
)

expect_identical(
invisible(chop(df, x, by = everything())),
tibble(x = 1, y = 1, x = one, .name_repair = "unique")
)
})
})

# unchop ------------------------------------------------------------------

test_that("extends into rows", {
Expand Down