From f5a61e0e8c2ee9507b77adb8fbd31f60f8a774b5 Mon Sep 17 00:00:00 2001 From: ahl27 Date: Mon, 6 Jan 2025 19:41:45 -0500 Subject: [PATCH 1/4] Adds methods to allow comparisons between XVector objects without causing C stack overflow --- R/SharedVector-class.R | 21 +++++++++++++++++++++ R/XVector-class.R | 29 +++++++++++++++++++++++++++++ src/R_init_XVector.c | 1 + src/SharedVector_class.c | 40 ++++++++++++++++++++++++++++++++++++++++ src/XVector.h | 2 ++ 5 files changed, 93 insertions(+) diff --git a/R/SharedVector-class.R b/R/SharedVector-class.R index fd136d4..0db7d7f 100644 --- a/R/SharedVector-class.R +++ b/R/SharedVector-class.R @@ -277,6 +277,27 @@ SharedVector.compare <- function(x1, start1, x2, start2, width) .Call2("SharedVector_memcmp", x1, start1, x2, start2, width, PACKAGE="XVector") +SharedVector.order <- function(x, decreasing=FALSE){ + ## will have to add in method arg later + ## adding 1L because this method returns 0-indexed values + .Call("SharedVector_order", + x, length(x), decreasing, PACKAGE="XVector") + 1L +} +setMethod("order", "SharedVector", + function(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix")){ + args <- list(...) + if (length(args) == 1L) { + x <- args[[1L]] + SharedVector.order(x, decreasing) + } else { + args <- unname(args) + do.call(order, c(args, list(na.last=na.last, + decreasing=decreasing, + method=method))) + } + } +) + ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Low-level copy. diff --git a/R/XVector-class.R b/R/XVector-class.R index b3d51e0..7c9d0a2 100644 --- a/R/XVector-class.R +++ b/R/XVector-class.R @@ -169,6 +169,8 @@ setMethod("bindROWS", "XVector", .concatenate_XVector_objects) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Equality ### +### order() and sameAsPreviousROW are required to make XVector compatible with +### the equality functions defined in S4Vectors .XVector.equal <- function(x, y) { @@ -184,3 +186,30 @@ setMethod("==", signature(e1="XVector", e2="XVector"), function(e1, e2) .XVector.equal(e1, e2) ) +.XVector.order <- function(x, decreasing=FALSE){ + SharedVector.order(x@shared, decreasing) +} +setMethod("order", "XVector", + function(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix")){ + args <- list(...) + if (length(args) == 1L) { + x <- args[[1L]] + .XVector.order(x, decreasing) + } else { + args <- unname(args) + do.call(order, c(args, list(na.last=na.last, + decreasing=decreasing, + method=method))) + } + } +) + +.XVector.sameAsPreviousROW <- function(x){ + if(NROW(x) == 0){ + logical(0L) + } else { + c(FALSE, vapply(seq_along(head(x,n=-1L)), + \(i){ x[i] == x[i+1] }, logical(1L))) + } +} +setMethod("sameAsPreviousROW", "XVector", .XVector.sameAsPreviousROW) diff --git a/src/R_init_XVector.c b/src/R_init_XVector.c index 36240f4..b295eed 100644 --- a/src/R_init_XVector.c +++ b/src/R_init_XVector.c @@ -32,6 +32,7 @@ static const R_CallMethodDef callMethods[] = { CALLMETHOD_DEF(SharedVector_Ocopy_from_start, 6), CALLMETHOD_DEF(SharedVector_Ocopy_from_subscript, 4), CALLMETHOD_DEF(SharedVector_mcopy, 7), + CALLMETHOD_DEF(SharedVector_order, 3), /* SharedRaw_class.c */ CALLMETHOD_DEF(SharedRaw_new, 2), diff --git a/src/SharedVector_class.c b/src/SharedVector_class.c index 13f6fc0..75d92fd 100644 --- a/src/SharedVector_class.c +++ b/src/SharedVector_class.c @@ -259,6 +259,46 @@ SEXP SharedVector_memcmp(SEXP x1, SEXP start1, SEXP x2, SEXP start2, SEXP width) return ans; } +SEXP SharedVector_order(SEXP x, SEXP width, SEXP descending) +{ + SEXP ans; + int nelt, desc; + SEXP tag = _get_SharedVector_tag(x); + nelt = INTEGER(width)[0]; + desc = LOGICAL(descending)[0]; + + PROTECT(ans = NEW_INTEGER(nelt)); + int *indices = INTEGER(ans); + for(int i=0; i Date: Mon, 6 Jan 2025 20:19:10 -0500 Subject: [PATCH 2/4] fixes bug where order of comparisons could cause errors --- R/XVector-class.R | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/R/XVector-class.R b/R/XVector-class.R index 7c9d0a2..e0de62c 100644 --- a/R/XVector-class.R +++ b/R/XVector-class.R @@ -172,20 +172,6 @@ setMethod("bindROWS", "XVector", .concatenate_XVector_objects) ### order() and sameAsPreviousROW are required to make XVector compatible with ### the equality functions defined in S4Vectors -.XVector.equal <- function(x, y) -{ - if (class(x) != class(y) || x@length != y@length) - return(FALSE) - ans <- !SharedVector.compare(x@shared, x@offset + 1L, - y@shared, y@offset + 1L, - x@length) - as.logical(ans) -} - -setMethod("==", signature(e1="XVector", e2="XVector"), - function(e1, e2) .XVector.equal(e1, e2) -) - .XVector.order <- function(x, decreasing=FALSE){ SharedVector.order(x@shared, decreasing) } @@ -213,3 +199,31 @@ setMethod("order", "XVector", } } setMethod("sameAsPreviousROW", "XVector", .XVector.sameAsPreviousROW) + +## These methods are defined so that the XVector argument comes first +## this matters because of how S4Vectors::pcompare is defined; it attempts +## to coerce the second argument to a list and then concatenate, which can +## cause weird behavior if the first element is an atomic vector and the +## second is an XVector object. +setMethod("==", signature(e1="ANY", e2="XVector"), + function(e1, e2) { pcompare(e2, e1) == 0 } +) + +setMethod("<=", signature(e1="ANY", e2="XVector"), + function(e1, e2) { pcompare(e2, e1) >= 0L } +) + +.XVector.equal <- function(x, y) +{ + if (class(x) != class(y) || x@length != y@length) + return(FALSE) + ans <- !SharedVector.compare(x@shared, x@offset + 1L, + y@shared, y@offset + 1L, + x@length) + as.logical(ans) +} + +setMethod("==", signature(e1="XVector", e2="XVector"), + function(e1, e2) .XVector.equal(e1, e2) +) + From 30f1a4839190592e763433a28b3ce81f46385f7e Mon Sep 17 00:00:00 2001 From: ahl27 Date: Wed, 8 Jan 2025 10:34:38 -0500 Subject: [PATCH 3/4] reorder functions for consistency --- src/R_init_XVector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/R_init_XVector.c b/src/R_init_XVector.c index b295eed..c583d9d 100644 --- a/src/R_init_XVector.c +++ b/src/R_init_XVector.c @@ -29,10 +29,10 @@ static const R_CallMethodDef callMethods[] = { CALLMETHOD_DEF(externalptr_show, 1), CALLMETHOD_DEF(SharedVector_address0, 1), CALLMETHOD_DEF(SharedVector_memcmp, 5), + CALLMETHOD_DEF(SharedVector_order, 3), CALLMETHOD_DEF(SharedVector_Ocopy_from_start, 6), CALLMETHOD_DEF(SharedVector_Ocopy_from_subscript, 4), CALLMETHOD_DEF(SharedVector_mcopy, 7), - CALLMETHOD_DEF(SharedVector_order, 3), /* SharedRaw_class.c */ CALLMETHOD_DEF(SharedRaw_new, 2), From 6e6e6405c58d3b7336d03d07fbf0b6ccb6aa8166 Mon Sep 17 00:00:00 2001 From: ahl27 Date: Thu, 23 Jan 2025 12:39:46 -0500 Subject: [PATCH 4/4] Fixes comparisons to be atomic; adds pcompare support; update man and DESCRIPTION --- DESCRIPTION | 2 +- R/SharedVector-class.R | 9 +++-- R/XVector-class.R | 84 ++++++++++++++++++++++++++++-------------- man/XVector-class.Rd | 52 ++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 31 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7a788a3..7d72811 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Description: Provides memory efficient S4 classes for storing sequences biocViews: Infrastructure, DataRepresentation URL: https://bioconductor.org/packages/XVector BugReports: https://github.com/Bioconductor/XVector/issues -Version: 0.47.1 +Version: 0.47.2 License: Artistic-2.0 Encoding: UTF-8 Author: Hervé Pagès and Patrick Aboyoun diff --git a/R/SharedVector-class.R b/R/SharedVector-class.R index 0db7d7f..8708c92 100644 --- a/R/SharedVector-class.R +++ b/R/SharedVector-class.R @@ -291,9 +291,8 @@ setMethod("order", "SharedVector", SharedVector.order(x, decreasing) } else { args <- unname(args) - do.call(order, c(args, list(na.last=na.last, - decreasing=decreasing, - method=method))) + lapply(args, order, + na.last=na.last, decreasing=decreasing, method=method) } } ) @@ -396,3 +395,7 @@ setMethod("!=", signature(e1="SharedVector", e2="SharedVector"), function(e1, e2) address(e1@xp) != address(e2@xp) ) +setMethod("<=", signature(e1="SharedVector", e2="SharedVector"), + function(e1, e2) address(e1@xp) <= address(e2@xp) +) + diff --git a/R/XVector-class.R b/R/XVector-class.R index e0de62c..936e180 100644 --- a/R/XVector-class.R +++ b/R/XVector-class.R @@ -169,8 +169,56 @@ setMethod("bindROWS", "XVector", .concatenate_XVector_objects) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### Equality ### -### order() and sameAsPreviousROW are required to make XVector compatible with -### the equality functions defined in S4Vectors + +.XVector.equal <- function(x, y) +{ + if (class(x) != class(y) || x@length != y@length) + return(FALSE) + ans <- !SharedVector.compare(x@shared, x@offset + 1L, + y@shared, y@offset + 1L, + x@length) + as.logical(ans) +} + +.XVector.lt_or_equal <- function(x, y) +{ + if (class(x) != class(y) || x@length != y@length) + return(FALSE) + ans <- SharedVector.compare(x@shared, x@offset + 1L, + y@shared, y@offset + 1L, + x@length) + ans <= 0 +} + +setMethod("==", signature(e1="XVector", e2="XVector"), + function(e1, e2) .XVector.equal(e1, e2) +) + +setMethod("<=", signature(e1="XVector", e2="XVector"), + function(e1, e2) .XVector.lt_or_equal(e1, e2) +) + +setMethod("==", signature(e1="XVector", e2="ANY"), + function(e1, e2) e1 == as(e2, class(e1)) +) +setMethod("<=", signature(e1="XVector", e2="ANY"), + function(e1, e2) e1 <= as(e2, class(e1)) +) + +## These methods are required, otherwise it dispatches to base comparison +## (which is element-wise) +setMethod("==", signature(e1="ANY", e2="XVector"), + function(e1, e2) e2 == e1 +) +setMethod("<=", signature(e1="ANY", e2="XVector"), + function(e1, e2) !(e2 > e1) +) + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Parallel comparison +### +### order() and sameAsPreviousROW make XVector compatible with pcompare +### as defined in S4Vectors .XVector.order <- function(x, decreasing=FALSE){ SharedVector.order(x@shared, decreasing) @@ -183,9 +231,10 @@ setMethod("order", "XVector", .XVector.order(x, decreasing) } else { args <- unname(args) - do.call(order, c(args, list(na.last=na.last, - decreasing=decreasing, - method=method))) + ## do.call("order", args) doesn't work here + ## I can't figure out why...this produces the same result + lapply(args, order, + na.last=na.last, decreasing=decreasing, method=method) } } ) @@ -200,30 +249,11 @@ setMethod("order", "XVector", } setMethod("sameAsPreviousROW", "XVector", .XVector.sameAsPreviousROW) -## These methods are defined so that the XVector argument comes first +## This methods are defined so that the XVector argument comes first ## this matters because of how S4Vectors::pcompare is defined; it attempts ## to coerce the second argument to a list and then concatenate, which can ## cause weird behavior if the first element is an atomic vector and the ## second is an XVector object. -setMethod("==", signature(e1="ANY", e2="XVector"), - function(e1, e2) { pcompare(e2, e1) == 0 } -) - -setMethod("<=", signature(e1="ANY", e2="XVector"), - function(e1, e2) { pcompare(e2, e1) >= 0L } -) - -.XVector.equal <- function(x, y) -{ - if (class(x) != class(y) || x@length != y@length) - return(FALSE) - ans <- !SharedVector.compare(x@shared, x@offset + 1L, - y@shared, y@offset + 1L, - x@length) - as.logical(ans) -} - -setMethod("==", signature(e1="XVector", e2="XVector"), - function(e1, e2) .XVector.equal(e1, e2) +setMethod("pcompare", signature(x="ANY", y="XVector"), + function(x, y) -1*callNextMethod(y, x) ) - diff --git a/man/XVector-class.Rd b/man/XVector-class.Rd index 70f4fae..e9d74bf 100644 --- a/man/XVector-class.Rd +++ b/man/XVector-class.Rd @@ -15,6 +15,13 @@ \alias{as.numeric,XVector-method} \alias{show,XVector-method} \alias{==,XVector,XVector-method} +\alias{<=,XVector,XVector-method} +\alias{>=,XVector,XVector-method} +\alias{<,XVector,XVector-method} +\alias{>,XVector,XVector-method} +\alias{!=,XVector,XVector-method} +\alias{pcompare,XVector,XVector-method} +\alias{order,XVector,XVector-method} % XRaw class, functions and methods: \alias{class:XRaw} @@ -80,6 +87,31 @@ data when a linear subsequence needs to be extracted. } +\section{Comparison operations on XVector objects}{ +Unlike the R's base vectors, comparing two XVector objects works \emph{atomically} -- that is, it doesn't compare element-by-element, but rather the two vectors as a whole. Thus, the return value of a comparison between two XVector objects will always be a single logical value. Comparison between an XVector and a base vector is performed by coercing the base vector to the same type as the XVector prior to comparison (potentially throwing an error if the comparison is impossible!). + +For element-wise comparison, the following are provided: + + \describe{ + \item{\code{pcompare(x,y)}:}{ + Compares the elements of two vectors \code{x} and \code{y} in an + element-wise fashion. If \code{length(x) != length(y)}, the shorter + length vector is recycled to the length of the longer. Returns a + vector where the i'th element is: + \itemize{ + \item negative if \code{x[i] < y[i]} + \item zero if \code{x[i] == y[i]} + \item positive if \code{x[i] > y[i]} + } More details are + available in the help page available via S4Vectors: + \code{\link[S4Vectors]{pcompare}}. + } + \item{\code{order(..., na.last=TRUE, decreasing=FALSE, method=c("auto", "shell", "radix"))}:}{ + Returns a permutation vector that rearranges its first argument into ascending or descending order, similar to \code{\link[base]{order}}. Argument \code{na.last} is ignored, since XVector objects do not allow \code{NA} values. Argument \code{method} is currently ignored, but will be implemented in the future. If multiple XVectors are passed, returns a list of permutation vectors for each XVector. + } + } +} + \section{Additional Subsetting operations on XVector objects}{ In the code snippets below, \code{x} is an XVector object. @@ -168,6 +200,26 @@ x3[length(x3):1] x3[length(x3):1, drop=FALSE] + + ## --------------------------------------------------------------------- + ## C. Comparing XVector OBJECTS + ## --------------------------------------------------------------------- + xv <- XInteger(5, 1:5) + yv <- XInteger(5, 5:1) + + ## Comparison between XVector objects is ATOMIC + xv == yv ## FALSE + xv < yv ## TRUE + + ## Element-wise comparison uses pcompare + pcompare(xv, yv) ## -1 -1 0 1 1 + pcompare(yv, xv) ## 1 1 0 -1 -1 + pcompare(xv, 5:1) ## equivalent to pcompare(xv, yv) + + ## Convert to T/F values by comparing against zero: + pcompare(xv, yv) < 0 ## element-wise xv < yv + pcomapre(xv, yv) >= 0 ## element-size xv >= yv + } \keyword{methods}