Skip to content

Commit 2d4b684

Browse files
committed
implement comments
1 parent f073830 commit 2d4b684

File tree

3 files changed

+45
-34
lines changed

3 files changed

+45
-34
lines changed

base/strings/search.jl

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,7 @@ function _search_bloom_mask(c)
188188
end
189189

190190
_nthbyte(s::String, i) = codeunit(s, i)
191-
_nthbyte(a::Union{Vector{UInt8},Vector{Int8}}, i) = a[i]
192-
_nthbyte(t::AbstractVector, index) = t[firstindex(t) + (index-1)]
191+
_nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]
193192

194193
function _searchindex(s::String, t::String, i::Integer)
195194
# Check for fast case of a single byte
@@ -199,29 +198,29 @@ end
199198

200199
function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},
201200
t::AbstractVector{<:Union{Int8,UInt8}},
202-
i::Integer)
201+
_i::Integer)
203202
n = length(t)
204203
m = length(s)
205-
f_s = firstindex(s)
206-
i < f_s && throw(BoundsError(s, i))
204+
i = Int(_i) - (firstindex(s) - 1)
205+
i < 1 && throw(BoundsError(s, _i))
207206

208207
if n == 0
209-
return f_s <= i <= m+1 ? max(f_s, i) : 0
208+
return 1 <= i <= m+1 ? max(1, i) : 0
210209
elseif m == 0
211210
return 0
212211
elseif n == 1
213212
return something(findnext(isequal(_nthbyte(t,1)), s, i), 0)
214213
end
215214

216215
w = m - n
217-
if w < 0 || i - f_s > w
216+
if w < 0 || i - 1 > w
218217
return 0
219218
end
220219

221220
bloom_mask = UInt64(0)
222-
skip = n - f_s
221+
skip = n - 1
223222
tlast = _nthbyte(t,n)
224-
for j in eachindex(t)
223+
for j in 1:n
225224
bloom_mask |= _search_bloom_mask(_nthbyte(t,j))
226225
if _nthbyte(t,j) == tlast && j < n
227226
skip = n - j - 1
@@ -242,7 +241,8 @@ function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},
242241

243242
# match found
244243
if j == n - 1
245-
return i+f_s
244+
# restore in case `s` is an OffSetArray
245+
return i+firstindex(s)
246246
end
247247

248248
# no match, try to rule out the next character
@@ -333,17 +333,20 @@ Find the next occurrence of the sequence `pattern` in vector `A` starting at pos
333333
334334
# Examples
335335
```jldoctest
336-
julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 5) === nothing
336+
julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 3) === nothing
337337
true
338338
339339
julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
340340
4:5
341341
```
342342
"""
343-
findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
344-
A::AbstractVector{<:Union{Int8,UInt8}},
345-
start::Integer) =
343+
function findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
344+
A::AbstractVector{<:Union{Int8,UInt8}},
345+
start::Integer)
346+
(start == (lastindex(A)+1)) && return nothing
347+
(start > (lastindex(A)+1)) && throw(BoundsError(A, start))
346348
_search(A, pattern, start)
349+
end
347350

348351
"""
349352
findlast(pattern::AbstractString, string::AbstractString)
@@ -376,9 +379,10 @@ julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62])
376379
3:4
377380
```
378381
"""
379-
findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
380-
A::AbstractVector{<:Union{Int8,UInt8}}) =
382+
function findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
383+
A::AbstractVector{<:Union{Int8,UInt8}})
381384
findprev(pattern, A, lastindex(A))
385+
end
382386
"""
383387
findlast(ch::AbstractChar, string::AbstractString)
384388
@@ -452,29 +456,29 @@ function _rsearchindex(s::String, t::String, i::Integer)
452456
end
453457
end
454458

455-
function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, k::Integer)
459+
function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer)
456460
n = length(t)
457461
m = length(s)
458-
f_s = firstindex(s)
459-
k < f_s && throw(BoundsError(s, k))
462+
k = Int(_k) - (firstindex(s) - 1)
463+
k < 1 && throw(BoundsError(s, _k))
460464

461465
if n == 0
462-
return 0 <= k <= m ? max(f_s, k) : 0
466+
return 0 <= k <= m ? max(k, 1) : 0
463467
elseif m == 0
464468
return 0
465469
elseif n == 1
466470
return something(findprev(isequal(_nthbyte(t,1)), s, k), 0)
467471
end
468472

469473
w = m - n
470-
if w < 0 || k <= f_s
474+
if w < 0 || k <= 0
471475
return 0
472476
end
473477

474478
bloom_mask = UInt64(0)
475479
skip = n - 1
476480
tfirst = _nthbyte(t,1)
477-
for j in reverse(eachindex(t))
481+
for j in n:-1:1
478482
bloom_mask |= _search_bloom_mask(_nthbyte(t,j))
479483
if _nthbyte(t,j) == tfirst && j > 1
480484
skip = j - 2
@@ -495,7 +499,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector
495499

496500
# match found
497501
if j == n
498-
return i + f_s - 1
502+
return i - 1 + firstindex(s)
499503
end
500504

501505
# no match, try to rule out the next character
@@ -587,10 +591,13 @@ julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
587591
2:3
588592
```
589593
"""
590-
findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
591-
A::AbstractVector{<:Union{Int8,UInt8}},
592-
start::Integer) =
594+
function findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
595+
A::AbstractVector{<:Union{Int8,UInt8}},
596+
start::Integer)
597+
(start == (lastindex(A)+1)) && return nothing
598+
(start > (lastindex(A)+1)) && throw(BoundsError(A, start))
593599
_rsearch(A, pattern, start)
600+
end
594601
"""
595602
occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString)
596603

test/offsetarray.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -630,24 +630,26 @@ end
630630
OA = OffsetArray(VT[0x40,0x52,0x62,0x52,0x62], 1)
631631
for PT in [Int8, UInt8]
632632
pattern = PT[0x52, 0x62]
633+
l_OA = lastindex(OA)
633634
@test findfirst(pattern, OA) === 3:4
634635
@test findnext(pattern, OA, 2) === 3:4
635636
@test findnext(pattern, OA, 4) === 5:6
636637
@test findnext(pattern, OA, 6) === nothing
637638
@test findnext(pattern, OA, 7) === nothing
638639
@test findnext(pattern, OA, 2) === 3:4
639640
@test findnext(pattern, OA, 4) === 5:6
640-
@test findnext(pattern, OA, 6) === nothing
641-
@test findnext(pattern, OA, 99) === nothing
641+
# 1 idx too far is allowed
642+
@test findnext(pattern, OA, l_OA+1) === nothing
643+
@test_throws BoundsError findnext(pattern, OA, l_OA+2)
642644
@test_throws BoundsError findnext(pattern, OA, 1)
643645

644646
@test findlast(pattern, OA) === 5:6
645647
@test findprev(pattern, OA, 2) === nothing
646648
@test findprev(pattern, OA, 4) === 3:4
647649
@test findprev(pattern, OA, 6) === 5:6
648-
@test findprev(pattern, OA, 99) === findlast(pattern, OA)
650+
@test findnext(pattern, OA, l_OA+1) === nothing
651+
@test_throws BoundsError findnext(pattern, OA, l_OA+2)
649652
@test_throws BoundsError findprev(pattern, OA, 1)
650653
end
651654
end
652655
end
653-

test/strings/search.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,16 +405,18 @@ end
405405
@test findfirst(pattern, A) === 2:3
406406
@test findnext(pattern, A, 2) === 2:3
407407
@test findnext(pattern, A, 3) === 4:5
408-
@test findnext(pattern, A, 5) === nothing
409-
@test findnext(pattern, A, 99) === nothing
408+
# 1 idx too long is allowed
409+
@test findnext(pattern, A, length(A)+1) === nothing
410410
@test_throws BoundsError findnext(pattern, A, -3)
411+
@test_throws BoundsError findnext(pattern, A, length(A)+2)
411412

412413
@test findlast(pattern, A) === 4:5
413414
@test findprev(pattern, A, 3) === 2:3
414415
@test findprev(pattern, A, 5) === 4:5
415416
@test findprev(pattern, A, 2) === nothing
416-
@test findprev(pattern, A, 99) === findlast(pattern, A)
417-
@test_throws BoundsError findprev(pattern, A, -2)
417+
@test findprev(pattern, A, length(A)+1) === nothing
418+
@test_throws BoundsError findprev(pattern, A, -3)
419+
@test_throws BoundsError findprev(pattern, A, length(A)+2)
418420
end
419421
end
420422

0 commit comments

Comments
 (0)