Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions R/XStringSet-io.R
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,11 @@ readAAStringSet <- function(filepath, format="fasta",
stop(wmsg("'width' must be a single integer"))
if (!is.integer(width))
width <- as.integer(width)
if (width < 1L)
stop(wmsg("'width' must be an integer >= 1"))
if (width < 1L) {
## set to maximum possible width
## using `lengths` because the function overrides `width`
width <- max(lengths(x))
}
lkup <- get_seqtype_conversion_lookup(seqtype(x), "B")
.Call2("write_XStringSet_to_fasta",
x, filexp_list, width, lkup,
Expand Down
2 changes: 1 addition & 1 deletion man/XStringSet-io.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ saveXStringSet(x, objname, dirpath=".", save.dups=FALSE, verbose=TRUE)

If \code{format="fasta"}, the \code{width} argument can be used to
specify the maximum number of letters per line of sequence.
\code{width} must be a single integer.
\code{width} must be a single integer, and uses an unbounded width if set to a value less than 1.

If \code{format="fastq"}, the \code{qualities} argument can be used to
specify the quality strings. \code{qualities} must be a \link{BStringSet}
Expand Down
24 changes: 18 additions & 6 deletions src/read_fasta_files.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ SEXP write_XStringSet_to_fasta(SEXP x, SEXP filexp_list, SEXP width, SEXP lkup)
{
XStringSet_holder X;
int x_length, width0, lkup_len, i, j1, j2, dest_nbytes;
int nmax_write_bytes, nwrote_bytes;
const int *lkup0;
SEXP filexp, x_names, desc;
Chars_holder X_elt;
Expand All @@ -636,8 +637,7 @@ SEXP write_XStringSet_to_fasta(SEXP x, SEXP filexp_list, SEXP width, SEXP lkup)
x_length = _get_length_from_XStringSet_holder(&X);
filexp = VECTOR_ELT(filexp_list, 0);
width0 = INTEGER(width)[0];
if (width0 >= IOBUF_SIZE)
error("'width' must be <= %d", IOBUF_SIZE - 1);
nmax_write_bytes = width0 >= IOBUF_SIZE ? IOBUF_SIZE - 1 : width0;
iobuf[width0] = 0;
if (lkup == R_NilValue) {
lkup0 = NULL;
Expand All @@ -657,21 +657,33 @@ SEXP write_XStringSet_to_fasta(SEXP x, SEXP filexp_list, SEXP width, SEXP lkup)
}
filexp_puts(filexp, "\n");
X_elt = _get_elt_from_XStringSet_holder(&X, i);
for (j1 = 0; j1 < X_elt.length; j1 += width0) {
j2 = j1 + width0;
j1 = 0;
nwrote_bytes = 0;
while(j1 < X_elt.length){
j2 = j1 + nmax_write_bytes;
if (j2 > X_elt.length)
j2 = X_elt.length;
dest_nbytes = j2 - j1;
if(nwrote_bytes + dest_nbytes > width0){
// align write to size width0
dest_nbytes = width0 - nwrote_bytes;
j2 = j1 + dest_nbytes;
}
j2--;
Ocopy_bytes_from_i1i2_with_lkup(j1, j2,
iobuf, dest_nbytes,
X_elt.ptr, X_elt.length,
lkup0, lkup_len);
iobuf[dest_nbytes] = 0;
nwrote_bytes += dest_nbytes;
j1 = j2+1;
filexp_puts(filexp, iobuf);
filexp_puts(filexp, "\n");
if(j1 == X_elt.length || nwrote_bytes == width0){
// add newline if at end of line (width0) or end of sequence
filexp_puts(filexp, "\n");
nwrote_bytes = 0;
}
}
}
return R_NilValue;
}

32 changes: 19 additions & 13 deletions src/read_fastq_files.c
Original file line number Diff line number Diff line change
Expand Up @@ -615,9 +615,25 @@ static void write_FASTQ_id(SEXP filexp, const char *markup, const char *id)
filexp_puts(filexp, "\n");
}

static void write_FASTQ_seq(SEXP filexp, const char *buf)
static void write_FASTQ_seq(SEXP filexp, const Chars_holder X,
const int *lkup0, int lkup_len)
{
filexp_puts(filexp, buf);
// write sequences in chunks in case sequence is longer than I/O buffer
int i1, i2, bytes_to_write, bytes_remaining;
bytes_remaining = X.length;
i1 = 0;
while(bytes_remaining){
bytes_to_write = bytes_remaining >= IOBUF_SIZE ? IOBUF_SIZE-1 : bytes_remaining;
i2 = i1+bytes_to_write-1;
Ocopy_bytes_from_i1i2_with_lkup(i1, i2,
iobuf, bytes_to_write,
X.ptr, X.length,
lkup0, lkup_len);
iobuf[bytes_to_write] = 0;
filexp_puts(filexp, iobuf);
i1 = i2 + 1;
bytes_remaining -= bytes_to_write;
}
filexp_puts(filexp, "\n");
}

Expand Down Expand Up @@ -677,18 +693,8 @@ SEXP write_XStringSet_to_fastq(SEXP x, SEXP filexp_list,
for (i = 0; i < x_length; i++) {
id = get_FASTQ_rec_id(x_names, q_names, i);
X_elt = _get_elt_from_XStringSet_holder(&X, i);
if (X_elt.length >= IOBUF_SIZE)
error("XStringSet object (or derivative) to "
"write 'x' cannot contain strings\n longer "
"than %d ('x[[%d]]' has %d characters)",
IOBUF_SIZE - 1, i + 1, X_elt.length);
Ocopy_bytes_from_i1i2_with_lkup(0, X_elt.length - 1,
iobuf, X_elt.length,
X_elt.ptr, X_elt.length,
lkup0, lkup_len);
iobuf[X_elt.length] = 0;
write_FASTQ_id(filexp, FASTQ_line1_markup, id);
write_FASTQ_seq(filexp, iobuf);
write_FASTQ_seq(filexp, X_elt, lkup0, lkup_len);
write_FASTQ_id(filexp, FASTQ_line3_markup, id);
if (qualities != R_NilValue) {
write_FASTQ_qual(filexp, X_elt.length, &Q, i);
Expand Down
40 changes: 40 additions & 0 deletions tests/testthat/test-XStringSet-io.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,43 @@ test_that("XStringSet read/write is correct", {
unlink(tmpfilefq)
})

test_that("Writing XStringSets larger than IOBUF_SIZE functions correctly", {
## IOBUF_SIZE is 200,003
tf <- tempfile()
set.seed(456L)
x1 <- paste(sample(DNA_BASES, 400020, replace=TRUE), collapse='')
x2 <- paste(sample(DNA_BASES, 200127, replace=TRUE), collapse='')
xss <- DNAStringSet(c(x1, x2))
names(xss) <- c("seq1", "seq2")
exp_output_fasta <- paste0('>seq1', x1, '>seq2', x2)
for(w in c(5, 80, 213, 200003, width(xss)[1], width(xss)[2], -1)){
writeXStringSet(xss, tf, width = w)
opt <- readLines(tf)
opt <- paste(opt, collapse='')
expect_equal(opt, exp_output_fasta)
read_seqs <- readDNAStringSet(tf)
expect_true(all(as.character(read_seqs) == as.character(xss)))
}
})

test_that("Writing QualityScaledXStringSets larger than IOBUF_SIZE functions correctly", {
## IOBUF_SIZE is 200,003
tf <- tempfile()
set.seed(456L)
x1 <- paste(sample(DNA_BASES, 200127, replace=TRUE), collapse='')
x2 <- paste(sample(DNA_BASES, 400020, replace=TRUE), collapse='')
q1 <- paste(sample(as.character(1:9), 200127, replace=TRUE), collapse='')
q2 <- paste(sample(as.character(1:9), 400020, replace=TRUE), collapse='')
xss <- DNAStringSet(c(x1, x2))
quals <- PhredQuality(c(q1, q2))
names(xss) <- c("seq1", "seq2")
names(quals) <- c("seq1", "seq2")
exp_output_fastq <- paste0("@seq1", x1, "+seq1", q1, "@seq2", x2, "+seq2", q2)
qss <- QualityScaledDNAStringSet(xss, quals)
writeQualityScaledXStringSet(qss, tf)
opt <- paste(readLines(tf), collapse='')
expect_equal(opt, exp_output_fastq)
read_seqs <- readQualityScaledDNAStringSet(tf)
expect_true(all(as.character(read_seqs) == as.character(xss)))
expect_true(all(as.character(quality(read_seqs)) == as.character(quals)))
})
Loading