Skip to content

Commit bdc3cef

Browse files
committed
Add eachregion(::AnnotatedString) implementation to Base
Base needs this functionality so that it can iterate its own `AnnotatedString`s.
1 parent 5aed3c2 commit bdc3cef

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

base/strings/annotated.jl

+107
Original file line numberDiff line numberDiff line change
@@ -459,3 +459,110 @@ function annotated_chartransform(f::Function, str::AnnotatedString, state=nothin
459459
end
460460
AnnotatedString(String(take!(outstr)), annots)
461461
end
462+
463+
struct RegionIterator{S <: AbstractString}
464+
str::S
465+
regions::Vector{UnitRange{Int}}
466+
annotations::Vector{Vector{@NamedTuple{label::Symbol, value::Any}}}
467+
end
468+
469+
Base.length(si::RegionIterator) = length(si.regions)
470+
471+
Base.@propagate_inbounds function Base.iterate(si::RegionIterator, i::Integer=1)
472+
if i <= length(si.regions)
473+
@inbounds ((SubString(si.str, si.regions[i]), si.annotations[i]), i+1)
474+
end
475+
end
476+
477+
Base.eltype(::RegionIterator{S}) where { S <: AbstractString} =
478+
Tuple{SubString{S}, Vector{@NamedTuple{label::Symbol, value::Any}}}
479+
480+
"""
481+
eachregion(s::AnnotatedString{S})
482+
eachregion(s::SubString{AnnotatedString{S}})
483+
484+
Identify the contiguous substrings of `s` with a constant annotations, and return
485+
an iterator which provides each substring and the applicable annotations as a
486+
`Tuple{SubString{S}, Vector{@NamedTuple{label::Symbol, value::Any}}}`.
487+
488+
# Examples
489+
490+
```jldoctest
491+
julia> collect(StyledStrings.eachregion(AnnotatedString(
492+
"hey there", [(1:3, :face, StyledStrings.FaceRef(:bold)),
493+
(5:9, :face, StyledStrings.FaceRef(:italic))])))
494+
3-element Vector{Tuple{SubString{String}, Vector{@NamedTuple{label::Symbol, value}}}}:
495+
("hey", [@NamedTuple{label::Symbol, value}((:face, StyledStrings.FaceRef(:bold)))])
496+
(" ", [])
497+
("there", [@NamedTuple{label::Symbol, value}((:face, StyledStrings.FaceRef(:italic)))])
498+
```
499+
"""
500+
function eachregion(s::AnnotatedString, subregion::UnitRange{Int}=firstindex(s):lastindex(s))
501+
isempty(s) || isempty(subregion) &&
502+
return RegionIterator(s.string, UnitRange{Int}[], Vector{@NamedTuple{label::Symbol, value::Any}}[])
503+
events = annotation_events(s, subregion)
504+
isempty(events) && return RegionIterator(s.string, [subregion], [@NamedTuple{label::Symbol, value::Any}[]])
505+
annotvals = @NamedTuple{label::Symbol, value::Any}[
506+
(; label, value) for (; label, value) in annotations(s)]
507+
regions = Vector{UnitRange{Int}}()
508+
annots = Vector{Vector{@NamedTuple{label::Symbol, value::Any}}}()
509+
pos = first(events).pos
510+
if pos > first(subregion)
511+
push!(regions, thisind(s, first(subregion)):prevind(s, pos))
512+
push!(annots, [])
513+
end
514+
activelist = Int[]
515+
for event in events
516+
if event.pos != pos
517+
push!(regions, pos:prevind(s, event.pos))
518+
push!(annots, annotvals[activelist])
519+
pos = event.pos
520+
end
521+
if event.active
522+
insert!(activelist, searchsortedfirst(activelist, event.index), event.index)
523+
else
524+
deleteat!(activelist, searchsortedfirst(activelist, event.index))
525+
end
526+
end
527+
if last(events).pos < nextind(s, last(subregion))
528+
push!(regions, last(events).pos:thisind(s, last(subregion)))
529+
push!(annots, [])
530+
end
531+
RegionIterator(s.string, regions, annots)
532+
end
533+
534+
function eachregion(s::SubString{<:AnnotatedString}, pos::UnitRange{Int}=firstindex(s):lastindex(s))
535+
if isempty(s)
536+
RegionIterator(s.string, Vector{UnitRange{Int}}(), Vector{Vector{@NamedTuple{label::Symbol, value::Any}}}())
537+
else
538+
eachregion(s.string, first(pos)+s.offset:last(pos)+s.offset)
539+
end
540+
end
541+
542+
"""
543+
annotation_events(string::AbstractString, annots::Vector{@NamedTuple{region::UnitRange{Int}, label::Symbol, value::Any}}, subregion::UnitRange{Int})
544+
annotation_events(string::AnnotatedString, subregion::UnitRange{Int})
545+
546+
Find all annotation "change events" that occur within a `subregion` of `annots`,
547+
with respect to `string`. When `string` is styled, `annots` is inferred.
548+
549+
Each change event is given in the form of a `@NamedTuple{pos::Int, active::Bool,
550+
index::Int}` where `pos` is the position of the event, `active` is a boolean
551+
indicating whether the annotation is being activated or deactivated, and `index`
552+
is the index of the annotation in question.
553+
"""
554+
function annotation_events(s::AbstractString, annots::Vector{@NamedTuple{region::UnitRange{Int}, label::Symbol, value::Any}}, subregion::UnitRange{Int})
555+
events = Vector{NamedTuple{(:pos, :active, :index), Tuple{Int, Bool, Int}}}() # Position, Active?, Annotation index
556+
for (i, (; region)) in enumerate(annots)
557+
if !isempty(intersect(subregion, region))
558+
start, stop = max(first(subregion), first(region)), min(last(subregion), last(region))
559+
start <= stop || continue # Currently can't handle empty regions
560+
push!(events, (pos=thisind(s, start), active=true, index=i))
561+
push!(events, (pos=nextind(s, stop), active=false, index=i))
562+
end
563+
end
564+
sort(events, by=e -> e.pos)
565+
end
566+
567+
annotation_events(s::AnnotatedString, subregion::UnitRange{Int}) =
568+
annotation_events(s.string, annotations(s), subregion)

test/strings/annotated.jl

+48
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,51 @@ end
258258
write(aio, Base.AnnotatedString("hello", [(1:5, :tag, 1)]))
259259
@test sprint(show, aio) == "Base.AnnotatedIOBuffer(5 bytes, 1 annotation)"
260260
end
261+
262+
@testset "Eachregion" begin
263+
annregions(str::String, annots::Vector{<:Tuple{UnitRange{Int}, Symbol, <:Any}}) =
264+
[(s, Tuple.(a)) for (s, a) in eachregion(AnnotatedString(str, annots))]
265+
# Regions that do/don't extend to the left/right edges
266+
@test annregions(" abc ", [(2:4, :face, :bold)]) ==
267+
[(" ", []),
268+
("abc", [(:face, :bold)]),
269+
(" ", [])]
270+
@test annregions(" x ", [(2:2, :face, :bold)]) ==
271+
[(" ", []),
272+
("x", [(:face, :bold)]),
273+
(" ", [])]
274+
@test annregions(" x", [(2:2, :face, :bold)]) ==
275+
[(" ", []),
276+
("x", [(:face, :bold)])]
277+
@test annregions("x ", [(1:1, :face, :bold)]) ==
278+
[("x", [(:face, :bold)]),
279+
(" ", [])]
280+
@test annregions("x", [(1:1, :face, :bold)]) ==
281+
[("x", [(:face, :bold)])]
282+
# Overlapping/nested regions
283+
@test annregions(" abc ", [(2:4, :face, :bold), (3:3, :face, :italic)]) ==
284+
[(" ", []),
285+
("a", [(:face, :bold)]),
286+
("b", [(:face, :bold), (:face, :italic)]),
287+
("c", [(:face, :bold)]),
288+
(" ", [])]
289+
@test annregions("abc-xyz", [(1:7, :face, :bold), (1:3, :face, :green), (4:4, :face, :yellow), (4:7, :face, :italic)]) ==
290+
[("abc", [(:face, :bold), (:face, :green)]),
291+
("-", [(:face, :bold), (:face, :yellow), (:face, :italic)]),
292+
("xyz", [(:face, :bold), (:face, :italic)])]
293+
# Preserving annotation order
294+
@test annregions("abcd", [(1:3, :face, :red), (2:2, :face, :yellow), (2:3, :face, :green), (2:4, :face, :blue)]) ==
295+
[("a", [(:face, :red)]),
296+
("b", [(:face, :red), (:face, :yellow), (:face, :green), (:face, :blue)]),
297+
("c", [(:face, :red), (:face, :green), (:face, :blue)]),
298+
("d", [(:face, :blue)])]
299+
@test annregions("abcd", [(2:4, :face, :blue), (1:3, :face, :red), (2:3, :face, :green), (2:2, :face, :yellow)]) ==
300+
[("a", [(:face, :red)]),
301+
("b", [(:face, :blue), (:face, :red), (:face, :green), (:face, :yellow)]),
302+
("c", [(:face, :blue), (:face, :red), (:face, :green)]),
303+
("d", [(:face, :blue)])]
304+
# Region starting after a character spanning multiple codepoints.
305+
@test annregions("𝟏x", [(1:4, :face, :red)]) ==
306+
[("𝟏", [(:face, :red)]),
307+
("x", [])]
308+
end

0 commit comments

Comments
 (0)