|
20 | 20 | # uuuuuXXXXXXXXXXXXX---------------------
|
21 | 21 | # || | | | |
|
22 | 22 | # |1 ptr size | maxsize (≥ lastindex)
|
23 |
| -# lastindex(data) |
| 23 | +# | lastindex(data) |
24 | 24 | # offset (set to zero)
|
25 | 25 |
|
26 | 26 | # * The underlying array is always 1-indexed
|
27 | 27 | # * The IOBuffer has full control (ownership) of the underlying array, only when
|
28 | 28 | # buffer.write == true.
|
29 |
| -# * Data in 1:mark can be deleted, shifting the whole thing to the left |
30 |
| -# to make room for more data, without replacing or resizing data |
| 29 | +# * Data before the mark can be deleted, shifting the whole thing to the left |
| 30 | +# to make room for more data, without replacing or resizing data. |
| 31 | +# This can be done only if the buffer is not seekable |
31 | 32 |
|
32 | 33 | # Internal trait object used to access unsafe constructors.
|
33 | 34 | struct UnsafeMethod end
|
@@ -59,20 +60,23 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
|
59 | 60 | # This value is always in 0 : lastindex(data)
|
60 | 61 | size::Int
|
61 | 62 |
|
| 63 | + # When the buffer is resized, or a new buffer allocated, this is the maximum size of the buffer. |
| 64 | + # A new GenericIOBuffer may be constructed with an existing data larger than `maxsize`. |
| 65 | + # When that happens, the buffer will not write to data in maxsize + 1 : lastindex(data). |
62 | 66 | # This value is always in 0:typemax(Int).
|
63 |
| - # We always have length(data) <= maxsize |
64 | 67 | maxsize::Int
|
65 | 68 |
|
66 | 69 | # Data is read/written from/to ptr, except in situations where append is true, in which case
|
67 | 70 | # data is still read from ptr, but written to size+1.
|
68 |
| - # This value is always in 1 : size+1 |
| 71 | + # This value is always in offset + 1 : size+1 |
69 | 72 | ptr::Int
|
70 | 73 |
|
71 | 74 | # This is used when seeking. seek(io, 0) results in ptr == offset.
|
72 | 75 | # The offset is needed because, if a buffer is instantiated from a Vector with a non-zero
|
73 | 76 | # memory offset, the start of the vector, and thus the start of data, does not correspond
|
74 | 77 | # to the start of its underlying memory.
|
75 | 78 | # Once the offset is set to zero, it will never be set to nonzero.
|
| 79 | + # This is always in 0:lastindex(data) |
76 | 80 | offset::Int
|
77 | 81 |
|
78 | 82 | # mark is the position (as given by `position`, i.e. io.ptr - io.offset - 1)
|
@@ -269,16 +273,43 @@ PipeBuffer(maxsize::Integer) = (x = PipeBuffer(StringMemory(maxsize), maxsize =
|
269 | 273 | _similar_data(b::GenericIOBuffer, len::Int) = similar(b.data, len)
|
270 | 274 | _similar_data(b::IOBuffer, len::Int) = StringMemory(len)
|
271 | 275 |
|
272 |
| -# TODO: Only copy the used data, not the whole buffer. |
| 276 | +# Note: Copying may change the value of the position (and mark) for un-seekable streams. |
| 277 | +# However, these values are not stable anyway due to compaction. |
| 278 | + |
273 | 279 | function copy(b::GenericIOBuffer)
|
274 |
| - ret = typeof(b)(b.reinit ? _similar_data(b, 0) : b.writable ? |
275 |
| - copyto!(_similar_data(b, length(b.data)), b.data) : b.data, |
276 |
| - b.readable, b.writable, b.seekable, b.append, b.maxsize) |
277 |
| - ret.size = b.size |
278 |
| - ret.ptr = b.ptr |
279 |
| - ret.mark = b.mark |
280 |
| - ret.offset = b.offset |
281 |
| - return ret |
| 280 | + if b.reinit |
| 281 | + # If buffer is used up, allocate a new size-zero buffer |
| 282 | + # Reinit implies wriable, and that ptr, size, offset and mark are already the default values |
| 283 | + return typeof(b)(_similar_data(b, 0), b.readable, b.writable, b.seekable, b.append, b.maxsize) |
| 284 | + elseif b.writable |
| 285 | + # Else, we just copy the reachable bytes. If buffer is seekable, all bytes |
| 286 | + # after offset are reachable, since they can be seeked to |
| 287 | + used_span = if b.seekable |
| 288 | + b.offset + 1 : b.size |
| 289 | + else |
| 290 | + # Even non-seekable streams can be seeked using `reset`. Therefore, we need to |
| 291 | + # copy all data from mark if it's set and below ptr. |
| 292 | + (b.mark > -1 ? min(b.ptr, b.mark) : b.ptr) : b.size |
| 293 | + end |
| 294 | + len = length(used_span) |
| 295 | + data = copyto!(_similar_data(b, len), view(b.data, used_span)) |
| 296 | + ret = typeof(b)(data, b.readable, b.writable, b.seekable, b.append, b.maxsize) |
| 297 | + ret.size = len |
| 298 | + ret.offset = 0 |
| 299 | + ret.ptr = b.ptr - first(used_span) + 1 |
| 300 | + ret.mark = b.mark < 0 ? -1 : (b.mark - first(used_span) + 1) |
| 301 | + return ret |
| 302 | + else |
| 303 | + # When the buffer is just readable, they can share the same data, so we just make |
| 304 | + # a shallow copy of the IOBuffer struct. |
| 305 | + # Use unsafe method because we want to allow b.maxsize to be larger than data, in case that |
| 306 | + # is the case for `b`. |
| 307 | + ret = typeof(b)(unsafe_method, b.data, b.readable, b.writable, b.seekable, b.append, b.maxsize) |
| 308 | + ret.offset = b.offset |
| 309 | + ret.ptr = b.ptr |
| 310 | + ret.mark = b.mark |
| 311 | + return ret |
| 312 | + end |
282 | 313 | end
|
283 | 314 |
|
284 | 315 | show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
|
|
0 commit comments