Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions R/XString-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,70 @@ setMethod("extract_character_from_XString_by_ranges", "XString",
}
)

### BString methods to support 0:255 input

## BSTRING_RAW_LOOKUP is initialized in `zzz.R`
setMethod("extract_character_from_XString_by_ranges", "BString",
function(x, start, width, collapse=FALSE)
{
SHOW_RAW <- getOption("Biostrings.showRaw")
if(!is.logical(SHOW_RAW)){
warning("Invalid value for option 'Biostrings.showRaw', ",
"resetting to FALSE")
SHOW_RAW <- FALSE
options(Biostrings.showRaw=FALSE)
}
if(!SHOW_RAW){
callNextMethod()
} else {
bstring_lookup <- get("BSTRING_RAW_LOOKUP", envir=.pkgenv)
lkup <- xs_dec_lkup(x)

## need to remap null bytes, they have to be in 0:255
## so we have to overload some value
if(is.null(lkup)) lkup <- c(255L,1:255)
xs <- XVector:::extract_character_from_XRaw_by_ranges(x, start, width,
collapse=collapse,
lkup=lkup)
## replace all undisplayable characters
for(i in seq_along(xs))
xs[i] <- paste(bstring_lookup[as.integer(charToRaw(xs[i]))+1L],
collapse='')
xs
}
}
)

setMethod("extract_character_from_XString_by_positions", "BString",
function(x, pos, collapse=FALSE)
{
SHOW_RAW <- getOption("Biostrings.showRaw")
if(!is.logical(SHOW_RAW)){
warning("Invalid value for option 'Biostrings.showRaw', ",
"resetting to FALSE")
SHOW_RAW <- FALSE
options(Biostrings.showRaw=FALSE)
}
if(!SHOW_RAW){
callNextMethod()
} else {
lkup <- xs_dec_lkup(x)
bstring_lookup <- get("BSTRING_RAW_LOOKUP", envir=.pkgenv)

## need to remap null bytes, they have to be in 0:255
## so we have to overload some value
if(is.null(lkup)) lkup <- c(255L,1:255)
xs <- XVector:::extract_character_from_XRaw_by_positions(x, pos,
collapse=collapse,
lkup=lkup)
## replace all undisplayable characters
for(i in seq_along(xs))
xs[i] <- paste(bstring_lookup[as.integer(charToRaw(xs[i]))+1L],
collapse='')
xs
}
}
)

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### make_XString_from_string()
Expand Down
30 changes: 30 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
###

.pkgenv <- new.env(parent=emptyenv())

.onLoad <- function(libname, pkgname)
{
.Call2("init_DNAlkups",
Expand All @@ -16,6 +18,34 @@
option_name <- "Biostrings.coloring"
if (!(option_name %in% names(.Options)))
options(setNames(list(TRUE), option_name))

option_name <- "Biostrings.showRaw"
if (!(option_name %in% names(.Options)))
options(setNames(list(FALSE), option_name))

## BString lookup for raw strings
## 256 char lookup table for 0:255 (note off by one)
## characters 0-31 and 127-255 are not displayable
## so positions 1-32 and 128-256 should be replaced
encoding_details <- l10n_info()
bstring_lookup <- rawToChar(as.raw(0:255), multiple=TRUE)
invalid_chars <- c(1:32,128:256)
# if(encoding_details$`UTF-8`){
# # braille is nice if supported
# # allows for char comparisons after as.character() comparisons
# # I think it's overkill, though...uncomment this section if we need it
# bstring_lookup[invalid_chars] <-
# as.character(parse(text=paste0("'\\U28", as.raw(95:255), "'")))
# } else
if (encoding_details$MBCS){
# use multibyte question mark if supported
compact_unknown <- rawToChar(as.raw(c(0xef, 0xbf, 0xbd)))
bstring_lookup[invalid_chars] <- compact_unknown
} else {
# otherwise just use the regular '?'
bstring_lookup[invalid_chars] <- "?"
}
assign("BSTRING_RAW_LOOKUP", bstring_lookup, envir=.pkgenv)
}

.onUnload <- function(libpath)
Expand Down
22 changes: 21 additions & 1 deletion man/XString-class.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@
\describe{
\item{\code{alphabet(x)}:}{
\code{NULL} for a \code{BString} object.
See the corresponding man pages when \code{x} is a
See the corresponding man pages when \code{x} is a
\link{DNAString}, \link{RNAString} or \link{AAString} object.
}
\item{\code{length(x)}:}{ or \code{nchar(x)}:
Expand Down Expand Up @@ -167,6 +167,14 @@
}
}

\note{
BString objects can technically hold any valid ASCII code, which includes all values in the range 0:255. However, non-displayable characters may cause the BString object to display in a weird format (see Examples for one such case). Even worse, if a 0 value is included in the BString, attempting to display it will throw a cryptic error that looks like this:

\code{Error in XVector:::extract_character_from_XRaw_by_ranges: embedded nul in string: ...}

BString objects are intended to hold displayable characters, so this shouldn't be an issue for most cases. However, if you need BString objects to hold non-displayable values, you can set \code{options(Biostrings.showRaw=TRUE)} to fix the formatting of non-displayable characters. Note that this adds some overhead to methods that show BString objects.
}

\author{H. Pagès}

\seealso{
Expand Down Expand Up @@ -214,6 +222,18 @@ identical(b, 1:length(b)) # FALSE
## Compacting. As a particular type of XVector objects, XString
## objects can optionally be compacted. Compacting is done typically
## before serialization. See ?compact for more information.

## Non-displayable characters in BStrings
## BString objects support any value, though this isn't encouraged:
b_bad <- as(as(as.raw(c(10,65,10,66,10,67,68)), "XRaw"), "BString")
b_bad ## formatting is all messed up because 10 = \n

## if you really need to display characters like this, set this option:
options(Biostrings.showRaw=TRUE)
b_bad ## now all 7 characters "display"

## reseting to default
options(Biostrings.showRaw=FALSE)
}

\keyword{methods}
Expand Down
23 changes: 23 additions & 0 deletions tests/testthat/test-XString-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,29 @@ test_that("reverse, complement, reverseComplement work correctly", {
expect_equal(as.character(reverseComplement(mrna)), .revString(mr_comp))
})

test_that("BStrings display correctly with full 0-255 value range", {
orig_setting <- getOption("Biostrings.showRaw")
full_bstring <- as(as(as.raw(0:255),"XRaw"),"BString")
options(Biostrings.showRaw=FALSE)
expect_error(extract_character_from_XString_by_ranges(full_bstring, 1L, 256L),
"embedded nul in string")

## can't really test MBCS vs. non-MBCS because we can't guarantee
## the test suites will run on a platform with(out) MBCS
options(Biostrings.showRaw=TRUE)
expect_is(extract_character_from_XString_by_ranges(full_bstring, 1L, 256L),
"character")

options(Biostrings.showRaw=10)
expect_warning(extract_character_from_XString_by_ranges(BString("ABCD"), 1L, 4L),
"Invalid value for option 'Biostrings.showRaw'")
expect_false(getOption("Biostrings.showRaw"))

## make sure we leave the system as we found it
options(Biostrings.showRaw=orig_setting)

})

## Porting RUnit tests
test_that("alphabet finds the correct values", {
expect_equal(alphabet(DNAString(dnastr)), strsplit(dnastr, "")[[1]])
Expand Down
Loading