Skip to content

Commit adc4a3c

Browse files
authored
[r] Fix iterative peak merging for isolated peaks (#338)
* [r] Fix iterative peak merging for isolated peaks
1 parent bde5737 commit adc4a3c

2 files changed

Lines changed: 40 additions & 16 deletions

File tree

r/R/atac_utils.R

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -296,25 +296,30 @@ merge_peaks_iterative <- function(peaks) {
296296
peaks <- tibble::as_tibble(peaks) %>%
297297
dplyr::distinct(chr, start, end, .keep_all = TRUE)
298298

299-
overlaps <- range_overlaps(peaks, peaks)
300-
# Initialize keeper set with any non-overlapping peaks
301-
keeper_sets <- list(setdiff(seq_len(nrow(peaks)), overlaps$from))
302-
303-
# Maintain invariant: overlaps contains only overlap pairs where we have not
304-
# permanently kept or discarded either of the elements in the pair
305-
overlaps <- dplyr::filter(overlaps, from < to)
306-
while (nrow(overlaps) > 0) {
307-
# Add peaks with no higher-ranked overlap to the keeper set
308-
keeper_set <- setdiff(overlaps$from, overlaps$to)
299+
# Only keep one direction for each overlap. With peaks ordered by priority,
300+
# lower row numbers have higher priority.
301+
overlaps <- range_overlaps(peaks, peaks) %>%
302+
dplyr::filter(from < to)
303+
304+
# Track peak indices which are neither conclusively excluded or included from a keeper set
305+
remaining <- seq_len(nrow(peaks))
306+
keeper_sets <- list()
307+
while (length(remaining) > 0) {
308+
309+
# Add peaks with no higher-ranked overlap to the keeper set. This includes
310+
# peaks that are isolated from the start or become isolated after discards.
311+
keeper_set <- setdiff(remaining, overlaps$to)
309312
keeper_sets <- c(keeper_sets, list(keeper_set))
313+
310314
# Mark peaks that overlap directly with the keeper set
311-
discard_set <- overlaps$to[overlaps$from %in% keeper_set] %>% unique()
315+
discard_set <- overlaps$to[overlaps$from %in% keeper_set] %>%
316+
unique()
317+
remove_set <- c(keeper_set, discard_set)
318+
remaining <- setdiff(remaining, remove_set)
319+
312320
# Discard all overlap information on the keeper and discard sets
313321
overlaps <- overlaps %>%
314-
dplyr::filter(
315-
!(to %in% discard_set), !(from %in% discard_set),
316-
!(from %in% keeper_set)
317-
)
322+
dplyr::filter(!(from %in% remove_set), !(to %in% remove_set))
318323
}
319324
return(peaks[sort(unlist(keeper_sets)), ])
320325
}

r/tests/testthat/test-atac_utils.R

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,13 @@ test_that("tile_ranges works", {
8484
})
8585

8686
test_that("merge_peaks works", {
87+
peaks0 <- tibble::tibble(
88+
chr = "chr1",
89+
start = as.integer(c(1, 5)),
90+
end = start + 2L
91+
)
92+
expect_identical(merge_peaks_iterative(peaks0), peaks0)
93+
8794
peaks1 <- tibble::tibble(
8895
chr = "chr1",
8996
start = as.integer(1:10),
@@ -96,6 +103,18 @@ test_that("merge_peaks works", {
96103
)
97104
expect_identical(merge_peaks_iterative(peaks1), res1)
98105

106+
peaks1_chain <- tibble::tibble(
107+
chr = "chr1",
108+
start = as.integer(1:3),
109+
end = start + 2L
110+
)
111+
res1_chain <- tibble::tibble(
112+
chr = "chr1",
113+
start = as.integer(c(1, 3)),
114+
end = start + 2L
115+
)
116+
expect_identical(merge_peaks_iterative(peaks1_chain), res1_chain)
117+
99118
peaks2 <- tibble::tibble(
100119
chr = "chr1",
101120
start = as.integer(c(1, 22, 11, 22, 2, 3, 13, 1, 4, 21, 12)),
@@ -740,4 +759,4 @@ test_that("qc_scATAC handles genes near the start of a chromosome", {
740759

741760
qc <- qc_scATAC(fragments, genes, blacklist)
742761
expect_identical(sort(qc$cellName), sort(cellNames(fragments)))
743-
})
762+
})

0 commit comments

Comments
 (0)