Skip to content

Commit 21008ca

Browse files
committed
Remove offset field from GenericIOBuffer
This field was added in d7b9ac8 when the buffer's underlying data was changed to Memory. However, I think it's not a good tradeoff. The benefit of the offset field is that, when a buffer is constructed from a Vector which has previously been pushfirst!'ed to, no data needs to be copied into the vector. However, this is probably rare. The downside is that it makes the implementation more complex, the struct larger, and adds extra overhead to all operations on the IO. In this commit, we instead copy the data internally in the buffer if constructed from a Vector with a non-zero memory offset.
1 parent 090489d commit 21008ca

File tree

2 files changed

+57
-83
lines changed

2 files changed

+57
-83
lines changed

base/iobuffer.jl

+53-79
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,26 @@
55
# Here, u represents used bytes (already read), X represents bytes still to read,
66
# - represents bytes uninitialized data but which can be written to later.
77
# . are bytes which can neither be read nor written - but the IOBuffer has control
8-
# of the full array, and may move the offset as it wishes to overwrite any . bytes.
8+
# of the full array.
99

10-
# ...uuuuuuuuuuuuuXXXXXXXXXXXXX------.......
11-
# | | | | | | |
12-
# | offset | ptr size | |
13-
# 1 mark | lastindex(data)
14-
# maxsize
10+
# uuuuuuuuuuuuuXXXXXXXXXXXXX------.......
11+
# | | | | | |
12+
# | | ptr size | |
13+
# 1 mark | lastindex(data)
14+
# maxsize
1515

1616
# AFTER COMPACTION
17-
# Mark, ptr and size decreases by (mark - offset - 1)
17+
# Mark, ptr and size decreases by (mark - 1)
1818

19-
# ...uuuuuXXXXXXXXXXXXX--------------.......
20-
# | || | | | |
21-
# | offset| size | lastindex(data)
22-
# 1 | ptr maxsize
23-
# mark ( == offset + 1)
19+
# uuuuuXXXXXXXXXXXXX--------------.......
20+
# | | | | |
21+
# | ptr size | lastindex(data)
22+
# mark maxsize
2423

2524
# * The underlying array is always 1-indexed
26-
# * The IOBuffer has full control of the underlying array. That means it may move
27-
# the offset, clobbering data in 1:offset and in size:lastindex(data) at will.
25+
# * The IOBuffer has full control of the underlying array.
2826
# * Data in maxsize+1:lastindex(data) is also never touched.
29-
# * Data in offset+1:mark-1, if mark > 0 can be deleted, shifting the whole thing to the left
27+
# * Data in 1::mark-1, if mark > 0 can be deleted, shifting the whole thing to the left
3028
# to make room for more data, without replacing or resizing data
3129

3230
mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
@@ -39,15 +37,15 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
3937
readable::Bool
4038
writable::Bool
4139

42-
# If not seekable, implementation is free to destroy (compact) data in offset+1:mark-1
40+
# If not seekable, implementation is free to destroy (compact) data in 1:mark-1
4341
seekable::Bool
4442

4543
# If true, write new data to the index size+1 instead of the index ptr.
4644
append::Bool
4745

4846
# Last index of `data` that has been written to. Data in size+1:end has not yet been used,
4947
# and may contain arbitrary values.
50-
# This value is always in offset : min(maxsize, lastindex(data))
48+
# This value is always in 0 : min(maxsize, lastindex(data))
5149
size::Int
5250

5351
# Size can never be larger than this value. This is useful for two use cases:
@@ -59,25 +57,19 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
5957

6058
# Data is read/written from/to ptr, except in situations where append is true, in which case
6159
# data is still read from ptr, but written to size+1.
62-
# This value is alwaus in offset+1 : size+1
60+
# This value is alwaus in 1 : size+1
6361
ptr::Int
6462

65-
# When constructed from a Vector `v`, `data` is its underlying memory, but the memory may contain
66-
# data before v[1]. In that case, the offset is the number of leading bytes in the memory that
67-
# the IO may never touch.
68-
# This value is always in 0:lastindex(data)
69-
offset::Int
70-
7163
# Data before the marked location for non-seekable buffers can be compacted.
7264
# If this is -1, the mark is not set.
7365
# The purpose of the mark is to reset the stream to a given position using reset.
74-
# This value is always == -1, or in offset+1:size
66+
# This value is always == -1, or in 1:size
7567
mark::Int
7668

7769
function GenericIOBuffer{T}(data::T, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
7870
maxsize::Integer) where T<:AbstractVector{UInt8}
7971
require_one_based_indexing(data)
80-
return new(data, false, readable, writable, seekable, append, length(data), maxsize, 1, 0, -1)
72+
return new(data, false, readable, writable, seekable, append, length(data), maxsize, 1, -1)
8173
end
8274
end
8375

@@ -88,16 +80,19 @@ function GenericIOBuffer(data::T, readable::Bool, writable::Bool, seekable::Bool
8880
GenericIOBuffer{T}(data, readable, writable, seekable, append, maxsize)
8981
end
9082

91-
# For this method, we use the underlying Memory of the vector. Therefore, we need to set the offset,
83+
# For this method, we use the underlying Memory of the vector. Therefore, we need to set the,
9284
# ptr and size accordingly, so the buffer only uses the part of the memory that the vector does.
9385
function GenericIOBuffer(data::Vector{UInt8}, readable::Bool, writable::Bool, seekable::Bool, append::Bool,
9486
maxsize::Integer)
9587
ref = data.ref
96-
buf = GenericIOBuffer(ref.mem, readable, writable, seekable, append, maxsize)
88+
mem = ref.mem
89+
len = length(data)
9790
offset = memoryrefoffset(ref) - 1
98-
buf.ptr += offset
99-
buf.size = length(data) + offset
100-
buf.offset = offset
91+
if !iszero(offset)
92+
unsafe_copyto!(mem, 1, mem, offset+1, len)
93+
end
94+
buf = GenericIOBuffer(mem, readable, writable, seekable, append, maxsize)
95+
buf.size = len
10196
return buf
10297
end
10398

@@ -181,7 +176,7 @@ function IOBuffer(
181176
flags = open_flags(read=read, write=write, append=append, truncate=truncate)
182177
buf = GenericIOBuffer(data, flags.read, flags.write, true, flags.append, Int(maxsize))
183178
if flags.truncate
184-
buf.size = buf.offset
179+
buf.size = 0
185180
end
186181
return buf
187182
end
@@ -245,7 +240,6 @@ function copy(b::GenericIOBuffer)
245240
ret.size = b.size
246241
ret.ptr = b.ptr
247242
ret.mark = b.mark
248-
ret.offset = b.offset
249243
return ret
250244
end
251245

@@ -254,9 +248,9 @@ show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
254248
"writable=", b.writable, ", ",
255249
"seekable=", b.seekable, ", ",
256250
"append=", b.append, ", ",
257-
"size=", b.size - b.offset, ", ",
251+
"size=", b.size, ", ",
258252
"maxsize=", b.maxsize == typemax(Int) ? "Inf" : b.maxsize, ", ",
259-
"ptr=", b.ptr - b.offset, ", ",
253+
"ptr=", b.ptr, ", ",
260254
"mark=", b.mark, ")")
261255

262256
@noinline function _throw_not_readable()
@@ -352,13 +346,13 @@ isreadable(io::GenericIOBuffer) = io.readable
352346
iswritable(io::GenericIOBuffer) = io.writable
353347

354348
# Number of bytes that can be read from the buffer, if you seek to the start first.
355-
filesize(io::GenericIOBuffer) = (io.seekable ? io.size - io.offset : bytesavailable(io))
349+
filesize(io::GenericIOBuffer) = (io.seekable ? io.size : bytesavailable(io))
356350

357351
# Number of bytes that can be read from the buffer.
358352
bytesavailable(io::GenericIOBuffer) = io.size - io.ptr + 1
359353

360354
# Position is zero-indexed, but ptr is one-indexed, hence the -1
361-
position(io::GenericIOBuffer) = io.ptr - io.offset - 1
355+
position(io::GenericIOBuffer) = io.ptr - 1
362356

363357
function skip(io::GenericIOBuffer, n::Integer)
364358
skip(io, clamp(n, Int))
@@ -393,7 +387,7 @@ function seek(io::GenericIOBuffer, n::Int)
393387
# of an GenericIOBuffer), so that would need to be fixed in order to throw an error here
394388
#(n < 0 || n > io.size - io.offset) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
395389
#io.ptr = n + io.offset + 1
396-
io.ptr = clamp(n, 0, io.size - io.offset) + io.offset + 1
390+
io.ptr = clamp(n, 0, io.size) + 1
397391
return io
398392
end
399393

@@ -407,29 +401,18 @@ end
407401
# this calls `similar`+copy
408402
function _resize!(io::GenericIOBuffer, sz::Int)
409403
a = io.data
410-
offset = io.offset
411404
if applicable(resize!, a, sz)
412-
if offset != 0
413-
size = io.size
414-
size > offset && copyto!(a, 1, a, offset + 1, min(sz, size - offset))
415-
io.ptr -= offset
416-
io.size -= offset
417-
io.offset = 0
418-
end
419405
resize!(a, sz)
420406
else
421407
size = io.size
422408
# Make a new data buffer, only if there is not room in existing buffer
423409
if size >= sz && sz != 0
424410
b = a
425411
else
426-
b = _similar_data(io, sz == 0 ? 0 : max(overallocation(size - io.offset), sz))
412+
b = _similar_data(io, sz == 0 ? 0 : max(overallocation(size), sz))
427413
end
428-
size > offset && copyto!(b, 1, a, offset + 1, min(sz, size - offset))
414+
size > 0 && copyto!(b, 1, a, 1, min(sz, size))
429415
io.data = b
430-
io.ptr -= offset
431-
io.size -= offset
432-
io.offset = 0
433416
end
434417
return io
435418
end
@@ -443,20 +426,18 @@ function truncate(io::GenericIOBuffer, n::Integer)
443426
if io.reinit
444427
io.data = _similar_data(io, n)
445428
io.reinit = false
446-
elseif n > length(io.data) + io.offset
429+
elseif n > length(io.data)
447430
_resize!(io, n)
448431
end
449432
ismarked(io) && io.mark > n && unmark(io)
450-
n += io.offset
451433
io.data[io.size+1:n] .= 0
452434
io.size = n
453435
io.ptr = min(io.ptr, n+1)
454436
return io
455437
end
456438

457439
# Internal method. Delete used data in the buffer, shifting the rest to the left.
458-
# Does not delete data at or after the mark, nor data before the offset, nor data
459-
# after ptr.
440+
# Does not delete data at or after the mark, nor data after ptr.
460441
function compact(io::GenericIOBuffer)
461442
# For a seekable buffer, the user could always seek back to the used data.
462443
# Therefore, it is invalid to compact a seekable buffer
@@ -467,7 +448,7 @@ function compact(io::GenericIOBuffer)
467448
local ptr::Int, bytes_to_move::Int
468449
if ismarked(io) && io.mark < position(io)
469450
io.mark == 0 && return
470-
ptr = io.mark + io.offset
451+
ptr = io.mark
471452
bytes_to_move = bytesavailable(io) + (io.ptr - ptr)
472453
else
473454
ptr = io.ptr
@@ -476,7 +457,6 @@ function compact(io::GenericIOBuffer)
476457
copyto!(io.data, 1, io.data, ptr, bytes_to_move)
477458
io.size -= ptr - 1
478459
io.ptr -= ptr - 1
479-
io.offset = 0
480460
return
481461
end
482462

@@ -487,13 +467,12 @@ end
487467
io.reinit = false
488468
end
489469
if !io.seekable
490-
if !ismarked(io) && io.ptr > io.offset+1 && io.size <= io.ptr - 1
470+
if !ismarked(io) && io.ptr > 1 && io.size <= io.ptr - 1
491471
io.ptr = 1
492472
io.size = 0
493-
io.offset = 0
494473
else
495-
datastart = (ismarked(io) ? io.mark : io.ptr - io.offset)
496-
if (io.size-io.offset+nshort > io.maxsize) ||
474+
datastart = (ismarked(io) ? io.mark : io.ptr)
475+
if (io.size+nshort > io.maxsize) ||
497476
(datastart > 4096 && datastart > io.size - io.ptr) ||
498477
(datastart > 262144)
499478
# apply somewhat arbitrary heuristics to decide when to destroy
@@ -507,11 +486,11 @@ end
507486

508487
@inline ensureroom(io::GenericIOBuffer, nshort::Int) = ensureroom(io, UInt(nshort))
509488
@inline function ensureroom(io::GenericIOBuffer, nshort::UInt)
510-
if !io.writable || (!io.seekable && io.ptr > io.offset+1) || io.reinit
489+
if !io.writable || (!io.seekable && io.ptr > 1) || io.reinit
511490
ensureroom_slowpath(io, nshort)
512491
end
513-
n = min((nshort % Int) + (io.append ? io.size : io.ptr-1) - io.offset, io.maxsize)
514-
l = length(io.data) + io.offset
492+
n = min((nshort % Int) + (io.append ? io.size : io.ptr-1), io.maxsize)
493+
l = length(io.data)
515494
if n > l
516495
_resize!(io, Int(n))
517496
end
@@ -530,7 +509,6 @@ end
530509
io.writable = false
531510
io.seekable = false
532511
io.size = 0
533-
io.offset = 0
534512
io.maxsize = 0
535513
io.ptr = 1
536514
io.mark = -1
@@ -563,11 +541,11 @@ function take!(io::GenericIOBuffer)
563541
if io.seekable
564542
# If the buffer is seekable, then the previously consumed bytes from ptr+1:size
565543
# must still be output, as they are not truly gone.
566-
# Hence, we output all bytes from offset+1:io.size
567-
nbytes = io.size - io.offset
568-
data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
544+
# Hence, we output all bytes from 1:io.size
545+
nbytes = io.size
546+
data = copyto!(StringVector(nbytes), 1, io.data, 1, nbytes)
569547
else
570-
# Else, if not seekable, bytes from offset+1:ptr-1 are truly gone and should not
548+
# Else, if not seekable, bytes from 1:ptr-1 are truly gone and should not
571549
# be output. Hence, we output `bytesavailable`, which is ptr:size
572550
nbytes = bytesavailable(io)
573551
data = read!(io, StringVector(nbytes))
@@ -577,7 +555,6 @@ function take!(io::GenericIOBuffer)
577555
if io.writable
578556
io.ptr = 1
579557
io.size = 0
580-
io.offset = 0
581558
end
582559
return data
583560
end
@@ -592,9 +569,9 @@ function take!(io::IOBuffer)
592569
if nbytes == 0 || io.reinit
593570
data = StringVector(0)
594571
elseif io.writable
595-
data = wrap(Array, memoryref(io.data, io.offset + 1), nbytes)
572+
data = wrap(Array, memoryref(io.data, 1), nbytes)
596573
else
597-
data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
574+
data = copyto!(StringVector(nbytes), 1, io.data, 1, nbytes)
598575
end
599576
else
600577
nbytes = bytesavailable(io)
@@ -610,7 +587,6 @@ function take!(io::IOBuffer)
610587
io.reinit = true
611588
io.ptr = 1
612589
io.size = 0
613-
io.offset = 0
614590
end
615591
return data
616592
end
@@ -628,10 +604,10 @@ It might save an allocation compared to `take!` (if the compiler elides the
628604
Array allocation), as well as omits some checks.
629605
"""
630606
_unsafe_take!(io::IOBuffer) =
631-
wrap(Array, io.size == io.offset ?
607+
wrap(Array, io.size == 0 ?
632608
memoryref(Memory{UInt8}()) :
633-
memoryref(io.data, io.offset + 1),
634-
io.size - io.offset)
609+
memoryref(io.data, 1),
610+
io.size)
635611

636612
function write(to::IO, from::GenericIOBuffer)
637613
written::Int = bytesavailable(from)
@@ -733,9 +709,7 @@ function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false)
733709
copyuntil(out, s, 0x0a, keep=true)
734710
line = out.data
735711
i = out.size # XXX: this is only correct for appended data. if the data was inserted, only ptr should change
736-
# TODO: This computation seems to be invalid. The buffer size may be smaller than out.size
737-
# due to the presence of out.offset.
738-
if keep || i == out.offset || line[i] != 0x0a
712+
if keep || iszero(i) || line[i] != 0x0a
739713
return out
740714
elseif i < 2 || line[i-1] != 0x0d
741715
i -= 1

base/stream.jl

+4-4
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ end
617617
function alloc_request(buffer::IOBuffer, recommended_size::UInt)
618618
ensureroom(buffer, Int(recommended_size))
619619
ptr = buffer.append ? buffer.size + 1 : buffer.ptr
620-
nb = min(length(buffer.data)-buffer.offset, buffer.maxsize) + buffer.offset - ptr + 1
620+
nb = min(length(buffer.data), buffer.maxsize) - ptr + 1
621621
return (Ptr{Cvoid}(pointer(buffer.data, ptr)), nb)
622622
end
623623

@@ -944,7 +944,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
944944
else
945945
initsize = length(a)
946946
newbuf = PipeBuffer(a, maxsize=nb)
947-
newbuf.size = newbuf.offset # reset the write pointer to the beginning
947+
newbuf.size = 0 # reset the write pointer to the beginning
948948
nread = try
949949
s.buffer = newbuf
950950
write(newbuf, sbuf)
@@ -992,7 +992,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
992992
unsafe_read(sbuf, p, nb)
993993
else
994994
newbuf = PipeBuffer(unsafe_wrap(Array, p, nb), maxsize=Int(nb))
995-
newbuf.size = newbuf.offset # reset the write pointer to the beginning
995+
newbuf.size = 0 # reset the write pointer to the beginning
996996
try
997997
s.buffer = newbuf
998998
write(newbuf, sbuf)
@@ -1601,7 +1601,7 @@ function readbytes!(s::BufferStream, a::Vector{UInt8}, nb::Int)
16011601
else
16021602
initsize = length(a)
16031603
newbuf = PipeBuffer(a, maxsize=nb)
1604-
newbuf.size = newbuf.offset # reset the write pointer to the beginning
1604+
newbuf.size = 0 # reset the write pointer to the beginning
16051605
nread = try
16061606
s.buffer = newbuf
16071607
write(newbuf, sbuf)

0 commit comments

Comments
 (0)