Skip to content

Commit f117a50

Browse files
authored
Introduce AnnotatedIOBuffer (#51807)
This allows for styled content to be constructed incrementally, without resorting to repeated concatenation. It operates very similarly to IOContext, just with a special `write` method and specifically wrapping an IOBuffer.
2 parents c16472b + 2b9839b commit f117a50

File tree

3 files changed

+194
-7
lines changed

3 files changed

+194
-7
lines changed

NEWS.md

+14-5
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,20 @@ New language features
1111
* The new macro `Base.Cartesian.@ncallkw` is analogous to `Base.Cartesian.@ncall`,
1212
but allows to add keyword arguments to the function call ([#51501]).
1313
* Support for Unicode 15.1 ([#51799]).
14-
* A new `AbstractString` type, `AnnotatedString`, is introduced that allows for
15-
regional annotations to be attached to an underlying string. This type is
16-
particularly useful for holding styling information, and is used extensively
17-
in the new `StyledStrings` standard library. There is also a new `AnnotatedChar`
18-
type, that is the equivalent new `AbstractChar` type.
14+
* Three new types around the idea of text with "annotations" (`Pair{Symbol, Any}`
15+
entries, e.g. `:lang => "en"` or `:face => :magenta`). These annotations
16+
are preserved across operations (e.g. string concatenation with `*`) when
17+
possible.
18+
* `AnnotatedString` is a new `AbstractString` type. It wraps an underlying
19+
string and allows for annotations to be attached to regions of the string.
20+
This type is used extensively in the new `StyledStrings` standard library to
21+
hold styling information.
22+
* `AnnotatedChar` is a new `AbstractChar` type. It wraps another char and
23+
holds a list of annotations that apply to it.
24+
* `AnnotatedIOBuffer` is a new `IO` type that mimics an `IOBuffer`, but has
25+
specialised `read`/`write` methods for annotated content. This can be
26+
thought of both as a "string builder" of sorts and also as glue between
27+
annotated and unannotated content.
1928
* `Manifest.toml` files can now be renamed in the format `Manifest-v{major}.{minor}.toml`
2029
to be preferentially picked up by the given julia version. i.e. in the same folder,
2130
a `Manifest-v1.11.toml` would be used by v1.11 and `Manifest.toml` by every other julia

base/strings/annotated.jl

+126-2
Original file line numberDiff line numberDiff line change
@@ -323,14 +323,15 @@ To remove existing `label` annotations, use a value of `nothing`.
323323
"""
324324
function annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any}))
325325
label, val = labelval
326-
indices = searchsorted(s.annotations, (range,), by=first)
327326
if val === nothing
327+
indices = searchsorted(s.annotations, (range,), by=first)
328328
labelindex = filter(i -> first(s.annotations[i][2]) === label, indices)
329329
for index in Iterators.reverse(labelindex)
330330
deleteat!(s.annotations, index)
331331
end
332332
else
333-
splice!(s.annotations, indices, [(range, Pair{Symbol, Any}(label, val))])
333+
sortedindex = searchsortedlast(s.annotations, (range,), by=first) + 1
334+
insert!(s.annotations, sortedindex, (range, Pair{Symbol, Any}(label, val)))
334335
end
335336
s
336337
end
@@ -386,3 +387,126 @@ annotations(s::SubString{<:AnnotatedString}, pos::UnitRange{<:Integer}) =
386387
Get all annotations of `chr`.
387388
"""
388389
annotations(c::AnnotatedChar) = c.annotations
390+
391+
## AnnotatedIOBuffer
392+
393+
struct AnnotatedIOBuffer <: AbstractPipe
394+
io::IOBuffer
395+
annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}
396+
end
397+
398+
AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}())
399+
AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())
400+
401+
function show(io::IO, aio::AnnotatedIOBuffer)
402+
show(io, AnnotatedIOBuffer)
403+
print(io, '(', aio.io.size, " byte", ifelse(aio.io.size == 1, "", "s"), ", ",
404+
length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
405+
end
406+
407+
pipe_reader(io::AnnotatedIOBuffer) = io.io
408+
pipe_writer(io::AnnotatedIOBuffer) = io.io
409+
410+
# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
411+
position(io::AnnotatedIOBuffer) = position(io.io)
412+
seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
413+
seekend(io::AnnotatedIOBuffer) = seekend(io.io)
414+
skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
415+
copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))
416+
417+
annotations(io::AnnotatedIOBuffer) = io.annotations
418+
419+
function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
420+
astr = AnnotatedString(astr)
421+
offset = position(io.io)
422+
eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
423+
_insert_annotations!(io, astr.annotations)
424+
write(io.io, String(astr))
425+
end
426+
427+
write(io::AnnotatedIOBuffer, c::AnnotatedChar) = write(io, AnnotatedString(c))
428+
write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
429+
write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
430+
write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
431+
432+
function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
433+
destpos = position(dest)
434+
isappending = eof(dest)
435+
srcpos = position(src)
436+
nb = write(dest.io, src.io)
437+
isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
438+
srcannots = [(max(1 + srcpos, first(region)):last(region), annot)
439+
for (region, annot) in src.annotations if first(region) >= srcpos]
440+
_insert_annotations!(dest, srcannots, destpos - srcpos)
441+
nb
442+
end
443+
444+
function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int})
445+
# Clear out any overlapping pre-existing annotations.
446+
filter!(((region, _),) -> first(region) < first(span) || last(region) > last(span), annotations)
447+
extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]
448+
for i in eachindex(annotations)
449+
region, annot = annotations[i]
450+
# Test for partial overlap
451+
if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
452+
annotations[i] = (if first(region) < first(span)
453+
first(region):first(span)-1
454+
else last(span)+1:last(region) end, annot)
455+
# If `span` fits exactly within `region`, then we've only copied over
456+
# the beginning overhang, but also need to conserve the end overhang.
457+
if first(region) < first(span) && last(span) < last(region)
458+
push!(extras, (last(span)+1:last(region), annot))
459+
end
460+
end
461+
# Insert any extra entries in the appropriate position
462+
for entry in extras
463+
sortedindex = searchsortedlast(annotations, (first(entry),), by=first) + 1
464+
insert!(annotations, sortedindex, entry)
465+
end
466+
end
467+
annotations
468+
end
469+
470+
function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
471+
if !eof(io)
472+
for (region, annot) in annotations
473+
region = first(region)+offset:last(region)+offset
474+
sortedindex = searchsortedlast(io.annotations, (region,), by=first) + 1
475+
insert!(io.annotations, sortedindex, (region, annot))
476+
end
477+
else
478+
for (region, annot) in annotations
479+
region = first(region)+offset:last(region)+offset
480+
push!(io.annotations, (region, annot))
481+
end
482+
end
483+
end
484+
485+
function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
486+
if (start = position(io)) == 0
487+
AnnotatedString(read(io.io, T), copy(io.annotations))
488+
else
489+
annots = [(max(1, first(region) - start):last(region)-start, val)
490+
for (region, val) in io.annotations if last(region) > start]
491+
AnnotatedString(read(io.io, T), annots)
492+
end
493+
end
494+
read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
495+
read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})
496+
497+
function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
498+
pos = position(io)
499+
char = read(io.io, T)
500+
annots = [annot for (range, annot) in io.annotations if pos+1 in range]
501+
AnnotatedChar(char, annots)
502+
end
503+
read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
504+
read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})
505+
506+
function truncate(io::AnnotatedIOBuffer, size::Integer)
507+
truncate(io.io, size)
508+
filter!(((range, _),) -> first(range) <= size, io.annotations)
509+
map!(((range, val),) -> (first(range):min(size, last(range)), val),
510+
io.annotations, io.annotations)
511+
io
512+
end

test/strings/annotated.jl

+54
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,57 @@ end
107107
@test reverse(str1) == Base.AnnotatedString("tset", [(1:4, :label => 5)])
108108
@test reverse(str2) == Base.AnnotatedString("esac", [(2:3, :label => "oomph")])
109109
end
110+
111+
@testset "AnnotatedIOBuffer" begin
112+
aio = Base.AnnotatedIOBuffer()
113+
# Append-only writing
114+
@test write(aio, Base.AnnotatedString("hello", [(1:5, :tag => 1)])) == 5
115+
@test write(aio, ' ') == 1
116+
@test write(aio, Base.AnnotatedString("world", [(1:5, :tag => 2)])) == 5
117+
@test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)]
118+
# Reading
119+
@test read(seekstart(deepcopy(aio.io)), String) == "hello world"
120+
@test read(seekstart(deepcopy(aio)), String) == "hello world"
121+
@test read(seek(aio, 0), Base.AnnotatedString) == Base.AnnotatedString("hello world", [(1:5, :tag => 1), (7:11, :tag => 2)])
122+
@test read(seek(aio, 1), Base.AnnotatedString) == Base.AnnotatedString("ello world", [(1:4, :tag => 1), (6:10, :tag => 2)])
123+
@test read(seek(aio, 4), Base.AnnotatedString) == Base.AnnotatedString("o world", [(1:1, :tag => 1), (3:7, :tag => 2)])
124+
@test read(seek(aio, 5), Base.AnnotatedString) == Base.AnnotatedString(" world", [(2:6, :tag => 2)])
125+
@test read(seekstart(truncate(deepcopy(aio), 5)), Base.AnnotatedString) == Base.AnnotatedString("hello", [(1:5, :tag => 1)])
126+
@test read(seekstart(truncate(deepcopy(aio), 6)), Base.AnnotatedString) == Base.AnnotatedString("hello ", [(1:5, :tag => 1)])
127+
@test read(seekstart(truncate(deepcopy(aio), 7)), Base.AnnotatedString) == Base.AnnotatedString("hello w", [(1:5, :tag => 1), (7:7, :tag => 2)])
128+
@test read(seek(aio, 0), Base.AnnotatedChar) == Base.AnnotatedChar('h', [:tag => 1])
129+
@test read(seek(aio, 5), Base.AnnotatedChar) == Base.AnnotatedChar(' ', Pair{Symbol, Any}[])
130+
@test read(seek(aio, 6), Base.AnnotatedChar) == Base.AnnotatedChar('w', [:tag => 2])
131+
# Check method compatibility with IOBuffer
132+
@test position(aio) == 7
133+
@test seek(aio, 4) === aio
134+
@test skip(aio, 2) === aio
135+
@test Base.annotations(copy(aio)) == Base.annotations(aio)
136+
@test take!(copy(aio).io) == take!(copy(aio.io))
137+
# Writing into the middle of the buffer
138+
@test write(seek(aio, 6), "alice") == 5 # Replace 'world' with 'alice'
139+
@test read(seekstart(aio), String) == "hello alice"
140+
@test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Should be unchanged
141+
@test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey => 'o')])) == 5
142+
@test read(seekstart(aio), String) == "hey-o alice"
143+
@test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:11, :tag => 2)] # First annotation should have been entirely replaced
144+
@test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey => 'a')])) == 3 # a[lic => bbi]e ('alice' => 'abbie')
145+
@test read(seekstart(aio), String) == "hey-o abbie"
146+
@test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
147+
@test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly
148+
@test read(seekstart(aio), String) == "aby-o abbie"
149+
@test Base.annotations(aio) == [(3:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
150+
@test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2
151+
@test read(seekstart(aio), String) == "abyss abbie"
152+
@test Base.annotations(aio) == [(3:3, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)]
153+
# Writing one buffer to another
154+
newaio = Base.AnnotatedIOBuffer()
155+
@test write(newaio, seekstart(aio)) == 11
156+
@test read(seekstart(newaio), String) == "abyss abbie"
157+
@test Base.annotations(newaio) == Base.annotations(aio)
158+
@test write(seek(newaio, 5), seek(aio, 5)) == 6
159+
@test Base.annotations(newaio) == Base.annotations(aio)
160+
@test write(newaio, seek(aio, 5)) == 6
161+
@test read(seekstart(newaio), String) == "abyss abbie abbie"
162+
@test Base.annotations(newaio) == vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)])
163+
end

0 commit comments

Comments
 (0)