Skip to content

Commit a5e0eab

Browse files
authored
Move eachregion(::AnnotatedString) implementation to Base (#57912)
Excising part of #56194 on the way to reviving that PR.
1 parent 3c88fa5 commit a5e0eab

File tree

4 files changed

+333
-175
lines changed

4 files changed

+333
-175
lines changed

base/strings/annotated.jl

+83-175
Original file line numberDiff line numberDiff line change
@@ -460,201 +460,109 @@ function annotated_chartransform(f::Function, str::AnnotatedString, state=nothin
460460
AnnotatedString(String(take!(outstr)), annots)
461461
end
462462

463-
## AnnotatedIOBuffer
464-
465-
struct AnnotatedIOBuffer <: AbstractPipe
466-
io::IOBuffer
467-
annotations::Vector{RegionAnnotation}
468-
end
469-
470-
AnnotatedIOBuffer(io::IOBuffer) = AnnotatedIOBuffer(io, Vector{RegionAnnotation}())
471-
AnnotatedIOBuffer() = AnnotatedIOBuffer(IOBuffer())
472-
473-
function show(io::IO, aio::AnnotatedIOBuffer)
474-
show(io, AnnotatedIOBuffer)
475-
size = filesize(aio.io)
476-
print(io, '(', size, " byte", ifelse(size == 1, "", "s"), ", ",
477-
length(aio.annotations), " annotation", ifelse(length(aio.annotations) == 1, "", "s"), ")")
463+
struct RegionIterator{S <: AbstractString}
464+
str::S
465+
regions::Vector{UnitRange{Int}}
466+
annotations::Vector{Vector{Annotation}}
478467
end
479468

480-
pipe_reader(io::AnnotatedIOBuffer) = io.io
481-
pipe_writer(io::AnnotatedIOBuffer) = io.io
482-
483-
# Useful `IOBuffer` methods that we don't get from `AbstractPipe`
484-
position(io::AnnotatedIOBuffer) = position(io.io)
485-
seek(io::AnnotatedIOBuffer, n::Integer) = (seek(io.io, n); io)
486-
seekend(io::AnnotatedIOBuffer) = (seekend(io.io); io)
487-
skip(io::AnnotatedIOBuffer, n::Integer) = (skip(io.io, n); io)
488-
copy(io::AnnotatedIOBuffer) = AnnotatedIOBuffer(copy(io.io), copy(io.annotations))
489-
490-
annotations(io::AnnotatedIOBuffer) = io.annotations
491-
492-
annotate!(io::AnnotatedIOBuffer, range::UnitRange{Int}, label::Symbol, @nospecialize(val::Any)) =
493-
(_annotate!(io.annotations, range, label, val); io)
494-
495-
function write(io::AnnotatedIOBuffer, astr::Union{AnnotatedString, SubString{<:AnnotatedString}})
496-
astr = AnnotatedString(astr)
497-
offset = position(io.io)
498-
eof(io) || _clear_annotations_in_region!(io.annotations, offset+1:offset+ncodeunits(astr))
499-
_insert_annotations!(io, astr.annotations)
500-
write(io.io, String(astr))
501-
end
469+
Base.length(si::RegionIterator) = length(si.regions)
502470

503-
write(io::AnnotatedIOBuffer, c::AnnotatedChar) =
504-
write(io, AnnotatedString(string(c), [(region=1:ncodeunits(c), a...) for a in c.annotations]))
505-
write(io::AnnotatedIOBuffer, x::AbstractString) = write(io.io, x)
506-
write(io::AnnotatedIOBuffer, s::Union{SubString{String}, String}) = write(io.io, s)
507-
write(io::AnnotatedIOBuffer, b::UInt8) = write(io.io, b)
508-
509-
function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer)
510-
destpos = position(dest)
511-
isappending = eof(dest)
512-
srcpos = position(src)
513-
nb = write(dest.io, src.io)
514-
isappending || _clear_annotations_in_region!(dest.annotations, destpos:destpos+nb)
515-
srcannots = [setindex(annot, max(1 + srcpos, first(annot.region)):last(annot.region), :region)
516-
for annot in src.annotations if first(annot.region) >= srcpos]
517-
_insert_annotations!(dest, srcannots, destpos - srcpos)
518-
nb
471+
Base.@propagate_inbounds function Base.iterate(si::RegionIterator, i::Integer=1)
472+
if i <= length(si.regions)
473+
@inbounds ((SubString(si.str, si.regions[i]), si.annotations[i]), i+1)
474+
end
519475
end
520476

521-
# So that read/writes with `IOContext` (and any similar `AbstractPipe` wrappers)
522-
# work as expected.
523-
function write(io::AbstractPipe, s::Union{AnnotatedString, SubString{<:AnnotatedString}})
524-
if pipe_writer(io) isa AnnotatedIOBuffer
525-
write(pipe_writer(io), s)
526-
else
527-
invoke(write, Tuple{IO, typeof(s)}, io, s)
528-
end::Int
529-
end
530-
# Can't be part of the `Union` above because it introduces method ambiguities
531-
function write(io::AbstractPipe, c::AnnotatedChar)
532-
if pipe_writer(io) isa AnnotatedIOBuffer
533-
write(pipe_writer(io), c)
534-
else
535-
invoke(write, Tuple{IO, typeof(c)}, io, c)
536-
end::Int
537-
end
477+
Base.eltype(::RegionIterator{S}) where { S <: AbstractString} =
478+
Tuple{SubString{S}, Vector{Annotation}}
538479

539480
"""
540-
_clear_annotations_in_region!(annotations::Vector{$RegionAnnotation}, span::UnitRange{Int})
481+
eachregion(s::AnnotatedString{S})
482+
eachregion(s::SubString{AnnotatedString{S}})
541483
542-
Erase the presence of `annotations` within a certain `span`.
484+
Identify the contiguous substrings of `s` with a constant annotations, and return
485+
an iterator which provides each substring and the applicable annotations as a
486+
`Tuple{SubString{S}, Vector{$Annotation}}`.
543487
544-
This operates by removing all elements of `annotations` that are entirely
545-
contained in `span`, truncating ranges that partially overlap, and splitting
546-
annotations that subsume `span` to just exist either side of `span`.
488+
# Examples
489+
490+
```jldoctest; setup=:(using Base: AnnotatedString, eachregion)
491+
julia> collect(eachregion(AnnotatedString(
492+
"hey there", [(1:3, :face, :bold),
493+
(5:9, :face, :italic)])))
494+
3-element Vector{Tuple{SubString{String}, Vector{$Annotation}}}:
495+
("hey", [$Annotation((:face, :bold))])
496+
(" ", [])
497+
("there", [$Annotation((:face, :italic))])
498+
```
547499
"""
548-
function _clear_annotations_in_region!(annotations::Vector{RegionAnnotation}, span::UnitRange{Int})
549-
# Clear out any overlapping pre-existing annotations.
550-
filter!(ann -> first(ann.region) < first(span) || last(ann.region) > last(span), annotations)
551-
extras = Tuple{Int, RegionAnnotation}[]
552-
for i in eachindex(annotations)
553-
annot = annotations[i]
554-
region = annot.region
555-
# Test for partial overlap
556-
if first(region) <= first(span) <= last(region) || first(region) <= last(span) <= last(region)
557-
annotations[i] =
558-
setindex(annot,
559-
if first(region) < first(span)
560-
first(region):first(span)-1
561-
else
562-
last(span)+1:last(region)
563-
end,
564-
:region)
565-
# If `span` fits exactly within `region`, then we've only copied over
566-
# the beginning overhang, but also need to conserve the end overhang.
567-
if first(region) < first(span) && last(span) < last(region)
568-
push!(extras, (i, setindex(annot, last(span)+1:last(region), :region)))
569-
end
500+
function eachregion(s::AnnotatedString, subregion::UnitRange{Int}=firstindex(s):lastindex(s))
501+
isempty(s) || isempty(subregion) &&
502+
return RegionIterator(s.string, UnitRange{Int}[], Vector{Annotation}[])
503+
events = annotation_events(s, subregion)
504+
isempty(events) && return RegionIterator(s.string, [subregion], [Annotation[]])
505+
annotvals = Annotation[
506+
(; label, value) for (; label, value) in annotations(s)]
507+
regions = Vector{UnitRange{Int}}()
508+
annots = Vector{Vector{Annotation}}()
509+
pos = first(events).pos
510+
if pos > first(subregion)
511+
push!(regions, thisind(s, first(subregion)):prevind(s, pos))
512+
push!(annots, [])
513+
end
514+
activelist = Int[]
515+
for event in events
516+
if event.pos != pos
517+
push!(regions, pos:prevind(s, event.pos))
518+
push!(annots, annotvals[activelist])
519+
pos = event.pos
520+
end
521+
if event.active
522+
insert!(activelist, searchsortedfirst(activelist, event.index), event.index)
523+
else
524+
deleteat!(activelist, searchsortedfirst(activelist, event.index))
570525
end
571526
end
572-
# Insert any extra entries in the appropriate position
573-
for (offset, (i, entry)) in enumerate(extras)
574-
insert!(annotations, i + offset, entry)
527+
if last(events).pos < nextind(s, last(subregion))
528+
push!(regions, last(events).pos:thisind(s, last(subregion)))
529+
push!(annots, [])
575530
end
576-
annotations
531+
RegionIterator(s.string, regions, annots)
577532
end
578533

579-
"""
580-
_insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{$RegionAnnotation}, offset::Int = position(io))
534+
function eachregion(s::SubString{<:AnnotatedString}, pos::UnitRange{Int}=firstindex(s):lastindex(s))
535+
if isempty(s)
536+
RegionIterator(s.string, Vector{UnitRange{Int}}(), Vector{Vector{Annotation}}())
537+
else
538+
eachregion(s.string, first(pos)+s.offset:last(pos)+s.offset)
539+
end
540+
end
581541

582-
Register new `annotations` in `io`, applying an `offset` to their regions.
542+
"""
543+
annotation_events(string::AbstractString, annots::Vector{$RegionAnnotation}, subregion::UnitRange{Int})
544+
annotation_events(string::AnnotatedString, subregion::UnitRange{Int})
583545
584-
The largely consists of simply shifting the regions of `annotations` by `offset`
585-
and pushing them onto `io`'s annotations. However, when it is possible to merge
586-
the new annotations with recent annotations in accordance with the semantics
587-
outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there
588-
is a run of the most recent annotations that are also present as the first
589-
`annotations`, with the same value and adjacent regions, the new annotations are
590-
merged into the existing recent annotations by simply extending their range.
546+
Find all annotation "change events" that occur within a `subregion` of `annots`,
547+
with respect to `string`. When `string` is styled, `annots` is inferred.
591548
592-
This is implemented so that one can say write an `AnnotatedString` to an
593-
`AnnotatedIOBuffer` one character at a time without needlessly producing a
594-
new annotation for each character.
549+
Each change event is given in the form of a `@NamedTuple{pos::Int, active::Bool,
550+
index::Int}` where `pos` is the position of the event, `active` is a boolean
551+
indicating whether the annotation is being activated or deactivated, and `index`
552+
is the index of the annotation in question.
595553
"""
596-
function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionAnnotation}, offset::Int = position(io))
597-
run = 0
598-
if !isempty(io.annotations) && last(last(io.annotations).region) == offset
599-
for i in reverse(axes(annotations, 1))
600-
annot = annotations[i]
601-
first(annot.region) == 1 || continue
602-
i <= length(io.annotations) || continue
603-
if annot.label == last(io.annotations).label && annot.value == last(io.annotations).value
604-
valid_run = true
605-
for runlen in 1:i
606-
new = annotations[begin+runlen-1]
607-
old = io.annotations[end-i+runlen]
608-
if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value
609-
valid_run = false
610-
break
611-
end
612-
end
613-
if valid_run
614-
run = i
615-
break
616-
end
617-
end
554+
function annotation_events(s::AbstractString, annots::Vector{RegionAnnotation}, subregion::UnitRange{Int})
555+
events = Vector{NamedTuple{(:pos, :active, :index), Tuple{Int, Bool, Int}}}() # Position, Active?, Annotation index
556+
for (i, (; region)) in enumerate(annots)
557+
if !isempty(intersect(subregion, region))
558+
start, stop = max(first(subregion), first(region)), min(last(subregion), last(region))
559+
start <= stop || continue # Currently can't handle empty regions
560+
push!(events, (pos=thisind(s, start), active=true, index=i))
561+
push!(events, (pos=nextind(s, stop), active=false, index=i))
618562
end
619563
end
620-
for runindex in 0:run-1
621-
old_index = lastindex(io.annotations) - run + 1 + runindex
622-
old = io.annotations[old_index]
623-
new = annotations[begin+runindex]
624-
io.annotations[old_index] = setindex(old, first(old.region):last(new.region)+offset, :region)
625-
end
626-
for index in run+1:lastindex(annotations)
627-
annot = annotations[index]
628-
start, stop = first(annot.region), last(annot.region)
629-
push!(io.annotations, setindex(annotations[index], start+offset:stop+offset, :region))
630-
end
564+
sort(events, by=e -> e.pos)
631565
end
632566

633-
function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString}
634-
if (start = position(io)) == 0
635-
AnnotatedString(read(io.io, T), copy(io.annotations))
636-
else
637-
annots = [setindex(annot, UnitRange{Int}(max(1, first(annot.region) - start), last(annot.region)-start), :region)
638-
for annot in io.annotations if last(annot.region) > start]
639-
AnnotatedString(read(io.io, T), annots)
640-
end
641-
end
642-
read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{AbstractString}}) = read(io, AnnotatedString{String})
643-
read(io::AnnotatedIOBuffer, ::Type{AnnotatedString}) = read(io, AnnotatedString{String})
644-
645-
function read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{T}}) where {T <: AbstractChar}
646-
pos = position(io)
647-
char = read(io.io, T)
648-
annots = [NamedTuple{(:label, :value)}(annot) for annot in io.annotations if pos+1 in annot.region]
649-
AnnotatedChar(char, annots)
650-
end
651-
read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar{AbstractChar}}) = read(io, AnnotatedChar{Char})
652-
read(io::AnnotatedIOBuffer, ::Type{AnnotatedChar}) = read(io, AnnotatedChar{Char})
653-
654-
function truncate(io::AnnotatedIOBuffer, size::Integer)
655-
truncate(io.io, size)
656-
filter!(ann -> first(ann.region) <= size, io.annotations)
657-
map!(ann -> setindex(ann, first(ann.region):min(size, last(ann.region)), :region),
658-
io.annotations, io.annotations)
659-
io
660-
end
567+
annotation_events(s::AnnotatedString, subregion::UnitRange{Int}) =
568+
annotation_events(s.string, annotations(s), subregion)

0 commit comments

Comments
 (0)