4
4
5
5
# Here, u represents used bytes (already read), X represents bytes still to read,
6
6
# - represents bytes uninitialized data but which can be written to later.
7
+ # . represents bytes before offset, which the buffer will not touch, until
8
+ # a write operation happens.
7
9
8
- # uuuuuuuuuuuuuXXXXXXXXXXXXX------------
9
- # | | | | | |
10
- # | | ptr size | maxsize (≥ lastindex)
11
- # 1 mark (zero-indexed) lastindex(data)
10
+ # ..... uuuuuuuuuuuuuXXXXXXXXXXXXX------------
11
+ # | | | | | | |
12
+ # | offset | ptr size | maxsize (≥ lastindex)
13
+ # 1 mark (zero-indexed) lastindex(data)
12
14
13
15
# AFTER COMPACTION
14
- # Mark, ptr and size decreases by `mark`
16
+ # Mark, ptr and size decreases by `mark`. Offset is zeroed.
15
17
16
18
# uuuuuXXXXXXXXXXXXX---------------------
17
19
# || | | | |
18
20
# |1 ptr size | maxsize (≥ lastindex)
19
21
# mark (zero-indexed) lastindex(data)
22
+ # offset (set to zero)
20
23
21
24
# * The underlying array is always 1-indexed
22
- # * The IOBuffer has full control (ownership) of the underlying array.
25
+ # * The IOBuffer has full control (ownership) of the underlying array, only when
26
+ # buffer.write == true.
23
27
# * Data in 1:mark can be deleted, shifting the whole thing to the left
24
28
# to make room for more data, without replacing or resizing data
25
29
@@ -33,13 +37,16 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
33
37
34
38
# The user can take control of `data` out of this struct. When that happens, instead of eagerly allocating
35
39
# a new array, we set `.reinit` to true, and then allocate a new one when needed.
40
+ # If reinit is true, the buffer is writable, and offset and size is zero. See `take!`
36
41
reinit:: Bool
37
42
readable:: Bool
38
43
writable:: Bool
39
44
40
45
# If not seekable, implementation is free to destroy (compact) data in 1:mark-1.
41
46
# If it IS seekable, the user may always recover any data in 1:size by seeking,
42
- # so no data can be destroyed
47
+ # so no data can be destroyed.
48
+ # Non-seekable IOBuffers can only be constructed with `PipeBuffer`, which are writable,
49
+ # readable and append.
43
50
seekable:: Bool
44
51
45
52
# If true, write new data to the index size+1 instead of the index ptr.
@@ -60,10 +67,18 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
60
67
# This value is always in 1 : size+1
61
68
ptr:: Int
62
69
63
- # Data at the marked location or before for non-seekable buffers can be deleted.
64
- # The mark is zero-indexed. If it is -1, the mark is not set.
65
- # The purpose of the mark is to reset the stream to a given position using reset.
66
- # This value is always in -1 : size-1
70
+ # This is used when seeking. seek(io, 0) results in ptr == offset.
71
+ # The offset is needed because, if a buffer is instantiated from a Vector with a non-zero
72
+ # memory offset, the start of the vector, and thus the start of data, does not correspond
73
+ # to the start of its underlying memory.
74
+ # Once the offset is set to zero, it will never be set to nonzero.
75
+ offset:: Int
76
+
77
+ # mark is the position (as given by `position`, i.e. io.ptr - io.offset - 1)
78
+ # which can be seeked back using `reset`, even for non-seekable buffers.
79
+ # For non-seekable buffers that can be compacted, data before the mark can be
80
+ # destroyed.
81
+ # This value is always in -1 : size-offset-1
67
82
mark:: Int
68
83
69
84
# Unsafe constructor which does not do any checking
@@ -77,7 +92,7 @@ mutable struct GenericIOBuffer{T<:AbstractVector{UInt8}} <: IO
77
92
maxsize:: Int ,
78
93
) where T<: AbstractVector{UInt8}
79
94
len = Int (length (data)):: Int
80
- return new (data, false , readable, writable, seekable, append, len, maxsize, 1 , - 1 )
95
+ return new (data, false , readable, writable, seekable, append, len, maxsize, 1 , 0 , - 1 )
81
96
end
82
97
end
83
98
@@ -113,11 +128,10 @@ function GenericIOBuffer(data::Vector{UInt8}, readable::Bool, writable::Bool, se
113
128
mem = ref. mem
114
129
len = length (data)
115
130
offset = memoryrefoffset (ref) - 1
116
- if ! iszero (offset)
117
- unsafe_copyto! (mem, 1 , mem, offset+ 1 , len)
118
- end
119
131
buf = GenericIOBuffer (mem, readable, writable, seekable, append, maxsize)
120
- buf. size = len
132
+ buf. size = len + offset
133
+ buf. ptr = offset + 1
134
+ buf. offset = offset
121
135
return buf
122
136
end
123
137
@@ -199,7 +213,7 @@ function IOBuffer(
199
213
flags = open_flags (read= read, write= write, append= append, truncate= truncate)
200
214
buf = GenericIOBuffer (data, flags. read, flags. write, true , flags. append, maxsize)
201
215
if flags. truncate
202
- buf. size = 0
216
+ buf. size = buf . offset
203
217
end
204
218
return buf
205
219
end
@@ -225,9 +239,11 @@ function IOBuffer(;
225
239
flags = open_flags (read= read, write= write, append= append, truncate= truncate)
226
240
# A common usecase of IOBuffer is to incrementally construct strings. By using StringMemory
227
241
# as the default storage, we can turn the result into a string without copying.
228
- buf = GenericIOBuffer {Memory{UInt8}} (unsafe_method, StringMemory (size), flags. read, flags. write, true , flags. append, mz)
242
+ # TODO : Do we need to zero this here?
243
+ data = fill! (StringMemory (size), 0 )
244
+ buf = GenericIOBuffer {Memory{UInt8}} (unsafe_method, data, flags. read, flags. write, true , flags. append, mz)
229
245
if flags. truncate
230
- buf. size = 0
246
+ buf. size = buf . offset
231
247
end
232
248
return buf
233
249
end
@@ -258,6 +274,7 @@ function copy(b::GenericIOBuffer)
258
274
ret. size = b. size
259
275
ret. ptr = b. ptr
260
276
ret. mark = b. mark
277
+ ret. offset = b. offset
261
278
return ret
262
279
end
263
280
@@ -266,9 +283,9 @@ show(io::IO, b::GenericIOBuffer) = print(io, "IOBuffer(data=UInt8[...], ",
266
283
" writable=" , b. writable, " , " ,
267
284
" seekable=" , b. seekable, " , " ,
268
285
" append=" , b. append, " , " ,
269
- " size=" , b. size, " , " ,
286
+ " size=" , b. size - b . offset , " , " ,
270
287
" maxsize=" , b. maxsize == typemax (Int) ? " Inf" : b. maxsize, " , " ,
271
- " ptr=" , b. ptr, " , " ,
288
+ " ptr=" , b. ptr - b . offset , " , " ,
272
289
" mark=" , b. mark, " )" )
273
290
274
291
@noinline function _throw_not_readable ()
@@ -402,15 +419,15 @@ isreadable(io::GenericIOBuffer) = io.readable
402
419
iswritable (io:: GenericIOBuffer ) = io. writable
403
420
404
421
# Number of bytes that can be read from the buffer, if you seek to the start first.
405
- filesize (io:: GenericIOBuffer ) = (io. seekable ? io. size : bytesavailable (io))
422
+ filesize (io:: GenericIOBuffer ) = (io. seekable ? io. size - io . offset : bytesavailable (io))
406
423
407
424
# Number of bytes that can be read from the buffer.
408
425
bytesavailable (io:: GenericIOBuffer ) = io. size - io. ptr + 1
409
426
410
427
# Position is zero-indexed, but ptr is one-indexed, hence the -1
411
428
# TODO : Document that position for an unseekable stream is invalid, or
412
429
# make it error
413
- position (io:: GenericIOBuffer ) = io. ptr - 1
430
+ position (io:: GenericIOBuffer ) = io. ptr - io . offset - 1
414
431
415
432
function skip (io:: GenericIOBuffer , n:: Integer )
416
433
skip (io, clamp (n, Int))
@@ -426,8 +443,8 @@ function skip(io::GenericIOBuffer, n::Int)
426
443
seek (io, seekto) # Does error checking
427
444
else
428
445
# Don't use seek in order to allow a non-seekable IO to still skip bytes.
429
- n_max = io . size + 1 - io . ptr
430
- io. ptr + = min (n, n_max )
446
+ # Handle overflow
447
+ io. ptr = min (io . size + 1 , clamp ( widen (io . ptr) + widen (n), Int) )
431
448
io
432
449
end
433
450
end
@@ -445,7 +462,7 @@ function seek(io::GenericIOBuffer, n::Int)
445
462
# of an GenericIOBuffer), so that would need to be fixed in order to throw an error here
446
463
# (n < 0 || n > io.size - io.offset) && throw(ArgumentError("Attempted to seek outside IOBuffer boundaries."))
447
464
# io.ptr = n + io.offset + 1
448
- io. ptr = clamp (n, 0 , io. size) + 1
465
+ io. ptr = clamp (n, 0 , io. size - io . offset) + io . offset + 1
449
466
return io
450
467
end
451
468
457
474
458
475
# Resize data to exactly size `sz`. Either resize the underlying data,
459
476
# or allocate a new one and copy.
477
+ # This should only be called after the offset is zero - any operation which calls
478
+ # _resize! should reset offset before so.
460
479
function _resize! (io:: GenericIOBuffer , new_size:: Int )
461
480
old_data = io. data
481
+ @assert iszero (io. offset)
462
482
if applicable (resize!, old_data, new_size)
463
483
resize! (old_data, new_size)
464
484
else
@@ -475,22 +495,30 @@ function _resize!(io::GenericIOBuffer, new_size::Int)
475
495
return io
476
496
end
477
497
478
- function truncate (io:: GenericIOBuffer , n:: Integer )
498
+ # TODO : These errors cannot be converted to LazyString, but it's wasteful to interpolate them here.
499
+ function truncate (io:: GenericIOBuffer , n:: Integer )
479
500
io. writable || throw (ArgumentError (" truncate failed, IOBuffer is not writeable" ))
501
+ # Non-seekable buffers can only be constructed with `PipeBuffer`, which is explicitly
502
+ # documented to not be truncatable.
480
503
io. seekable || throw (ArgumentError (" truncate failed, IOBuffer is not seekable" ))
481
504
n < 0 && throw (ArgumentError (" truncate failed, n bytes must be ≥ 0, got $n " ))
482
505
n > io. maxsize && throw (ArgumentError (" truncate failed, $(n) bytes is exceeds IOBuffer maxsize $(io. maxsize) " ))
483
- n = Int (n)
506
+ n = Int (n):: Int
484
507
if io. reinit
508
+ @assert iszero (io. offset)
485
509
io. data = _similar_data (io, n)
486
510
io. reinit = false
487
- elseif n > length (io. data)
488
- _resize! (io, n)
489
- end
490
- ismarked (io) && io. mark > n && unmark (io)
491
- io. data[io. size+ 1 : n] .= 0
492
- io. size = n
493
- io. ptr = min (io. ptr, n+ 1 )
511
+ elseif n > length (io. data) - io. offset
512
+ # We zero the offset here because that allows us to minimize the resizing,
513
+ # saving memory.
514
+ zero_offset! (io)
515
+ n > length (io. data) && _resize! (io, n)
516
+ end
517
+ # Since mark is zero-indexed, we must also clear it if they're equal
518
+ ismarked (io) && io. mark >= n && (io. mark = - 1 )
519
+ io. data[io. size+ 1 : n+ io. offset] .= 0
520
+ io. size = n + io. offset
521
+ io. ptr = min (io. ptr, n+ io. offset+ 1 )
494
522
return io
495
523
end
496
524
@@ -520,12 +548,18 @@ end
520
548
io. writable || throw (ArgumentError (" ensureroom failed, IOBuffer is not writeable" ))
521
549
io. data = _similar_data (io, min (io. maxsize, nshort % Int))
522
550
io. reinit = false
551
+ io. offset = 0
523
552
return io
524
553
end
525
554
526
555
@noinline function ensureroom_slowpath (io:: GenericIOBuffer , nshort:: UInt )
527
- # If the buffer is seekable, the user can seek to before ptr, and so we
528
- # cannot compact the data.
556
+ # Begin by zeroing out offset and check if that gives us room enough
557
+ nshort -= zero_offset! (io) % UInt
558
+ iszero (nshort) && return io
559
+
560
+ # Else, try to compact the data. To do this, the buffer must not be seekable.
561
+ # If it's seekable, the user can recover used data by seeking before ptr,
562
+ # and so we can't delete it.
529
563
if (! io. seekable && io. ptr > 1 )
530
564
ptr = io. ptr
531
565
mark = io. mark
556
590
return io
557
591
end
558
592
593
+ function zero_offset! (io:: GenericIOBuffer ):: Int
594
+ @assert io. writable
595
+ offset = io. offset
596
+ iszero (offset) && return 0
597
+ size = io. size
598
+ if size != offset
599
+ data = io. data
600
+ unsafe_copyto! (data, 1 , data, offset + 1 , size - offset)
601
+ end
602
+ io. offset = 0
603
+ io. ptr -= offset
604
+ io. size -= offset
605
+ return offset
606
+ end
607
+
559
608
eof (io:: GenericIOBuffer ) = (io. ptr - 1 >= io. size)
560
609
561
610
function closewrite (io:: GenericIOBuffer )
571
620
io. maxsize = 0
572
621
io. ptr = 1
573
622
io. mark = - 1
623
+ io. offset = 0
574
624
if io. writable && ! io. reinit
575
625
io. data = _resize! (io, 0 )
576
626
end
@@ -601,8 +651,8 @@ function take!(io::GenericIOBuffer)
601
651
# If the buffer is seekable, then the previously consumed bytes from ptr+1:size
602
652
# must still be output, as they are not truly gone.
603
653
# Hence, we output all bytes from 1:io.size
604
- nbytes = io. size
605
- data = copyto! (StringVector (nbytes), 1 , io. data, 1 , nbytes)
654
+ nbytes = io. size - io . offset
655
+ data = copyto! (StringVector (nbytes), 1 , io. data, io . offset + 1 , nbytes)
606
656
else
607
657
# Else, if not seekable, bytes from 1:ptr-1 are truly gone and should not
608
658
# be output. Hence, we output `bytesavailable`, which is ptr:size
@@ -613,6 +663,7 @@ function take!(io::GenericIOBuffer)
613
663
io. reinit = true
614
664
io. ptr = 1
615
665
io. size = 0
666
+ io. offset = 0
616
667
end
617
668
return data
618
669
end
@@ -627,9 +678,9 @@ function take!(io::IOBuffer)
627
678
if nbytes == 0 || io. reinit
628
679
data = StringVector (0 )
629
680
elseif io. writable
630
- data = wrap (Array, memoryref (io. data, 1 ), nbytes)
681
+ data = wrap (Array, memoryref (io. data, io . offset + 1 ), nbytes)
631
682
else
632
- data = copyto! (StringVector (nbytes), 1 , io. data, 1 , nbytes)
683
+ data = copyto! (StringVector (nbytes), 1 , io. data, io . offset + 1 , nbytes)
633
684
end
634
685
else
635
686
nbytes = bytesavailable (io)
@@ -645,6 +696,7 @@ function take!(io::IOBuffer)
645
696
io. reinit = true
646
697
io. ptr = 1
647
698
io. size = 0
699
+ io. offset = 0
648
700
end
649
701
return data
650
702
end
@@ -662,10 +714,10 @@ It might save an allocation compared to `take!` (if the compiler elides the
662
714
Array allocation), as well as omits some checks.
663
715
"""
664
716
_unsafe_take! (io:: IOBuffer ) =
665
- wrap (Array, io. size == 0 ?
717
+ wrap (Array, io. size == io . offset ?
666
718
memoryref (Memory {UInt8} ()) :
667
- memoryref (io. data, 1 ),
668
- io. size)
719
+ memoryref (io. data, io . offset + 1 ),
720
+ io. size - io . offset )
669
721
670
722
function write (to:: IO , from:: GenericIOBuffer )
671
723
available = bytesavailable (from)
@@ -820,7 +872,7 @@ function copyline(out::GenericIOBuffer, s::IO; keep::Bool=false)
820
872
copyuntil (out, s, 0x0a , keep= true )
821
873
line = out. data
822
874
i = out. size
823
- if keep || iszero (i) || line[i] != 0x0a
875
+ if keep || i == out . offset || line[i] != 0x0a
824
876
return out
825
877
elseif i < 2 || line[i- 1 ] != 0x0d
826
878
i -= 1
0 commit comments