Skip to content

Commit cef655a

Browse files
authored
fix #31521, make regexes thread-safe (#32381)
1 parent f6049d6 commit cef655a

File tree

4 files changed

+114
-59
lines changed

4 files changed

+114
-59
lines changed

base/client.jl

+4
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,10 @@ MainInclude.include
460460
function _start()
461461
empty!(ARGS)
462462
append!(ARGS, Core.ARGS)
463+
if ccall(:jl_generating_output, Cint, ()) != 0 && JLOptions().incremental == 0
464+
# clear old invalid pointers
465+
PCRE.__init__()
466+
end
463467
try
464468
exec_options(JLOptions())
465469
catch

base/pcre.jl

+51-22
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,36 @@ include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "pcre_h.jl")) # incl
1010

1111
const PCRE_LIB = "libpcre2-8"
1212

13-
const JIT_STACK = RefValue{Ptr{Cvoid}}(C_NULL)
14-
const MATCH_CONTEXT = RefValue{Ptr{Cvoid}}(C_NULL)
13+
function create_match_context()
14+
JIT_STACK_START_SIZE = 32768
15+
JIT_STACK_MAX_SIZE = 1048576
16+
jit_stack = ccall((:pcre2_jit_stack_create_8, PCRE_LIB), Ptr{Cvoid},
17+
(Cint, Cint, Ptr{Cvoid}),
18+
JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, C_NULL)
19+
ctx = ccall((:pcre2_match_context_create_8, PCRE_LIB),
20+
Ptr{Cvoid}, (Ptr{Cvoid},), C_NULL)
21+
ccall((:pcre2_jit_stack_assign_8, PCRE_LIB), Cvoid,
22+
(Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}), ctx, C_NULL, jit_stack)
23+
return ctx
24+
end
1525

16-
function __init__()
17-
try
18-
JIT_STACK_START_SIZE = 32768
19-
JIT_STACK_MAX_SIZE = 1048576
20-
JIT_STACK[] = ccall((:pcre2_jit_stack_create_8, PCRE_LIB), Ptr{Cvoid},
21-
(Cint, Cint, Ptr{Cvoid}),
22-
JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, C_NULL)
23-
MATCH_CONTEXT[] = ccall((:pcre2_match_context_create_8, PCRE_LIB),
24-
Ptr{Cvoid}, (Ptr{Cvoid},), C_NULL)
25-
ccall((:pcre2_jit_stack_assign_8, PCRE_LIB), Cvoid,
26-
(Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}), MATCH_CONTEXT[], C_NULL, JIT_STACK[])
27-
catch ex
28-
Base.showerror_nostdio(ex,
29-
"WARNING: Error during initialization of module PCRE")
26+
const THREAD_MATCH_CONTEXTS = Ptr{Cvoid}[C_NULL]
27+
28+
_tid() = Int(ccall(:jl_threadid, Int16, ())+1)
29+
_nth() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
30+
31+
function get_local_match_context()
32+
tid = _tid()
33+
ctx = @inbounds THREAD_MATCH_CONTEXTS[tid]
34+
if ctx == C_NULL
35+
@inbounds THREAD_MATCH_CONTEXTS[tid] = ctx = create_match_context()
3036
end
37+
return ctx
38+
end
39+
40+
function __init__()
41+
resize!(THREAD_MATCH_CONTEXTS, _nth())
42+
fill!(THREAD_MATCH_CONTEXTS, C_NULL)
3143
end
3244

3345
# supported options for different use cases
@@ -87,12 +99,16 @@ function info(regex::Ptr{Cvoid}, what::Integer, ::Type{T}) where T
8799
buf[]
88100
end
89101

90-
function get_ovec(match_data)
91-
ptr = ccall((:pcre2_get_ovector_pointer_8, PCRE_LIB), Ptr{Csize_t},
92-
(Ptr{Cvoid},), match_data)
102+
function ovec_length(match_data)
93103
n = ccall((:pcre2_get_ovector_count_8, PCRE_LIB), UInt32,
94104
(Ptr{Cvoid},), match_data)
95-
unsafe_wrap(Array, ptr, 2n, own = false)
105+
return 2n
106+
end
107+
108+
function ovec_ptr(match_data)
109+
ptr = ccall((:pcre2_get_ovector_pointer_8, PCRE_LIB), Ptr{Csize_t},
110+
(Ptr{Cvoid},), match_data)
111+
return ptr
96112
end
97113

98114
function compile(pattern::AbstractString, options::Integer)
@@ -132,15 +148,28 @@ function err_message(errno)
132148
GC.@preserve buffer unsafe_string(pointer(buffer))
133149
end
134150

135-
function exec(re,subject,offset,options,match_data)
151+
function exec(re, subject, offset, options, match_data)
136152
rc = ccall((:pcre2_match_8, PCRE_LIB), Cint,
137153
(Ptr{Cvoid}, Ptr{UInt8}, Csize_t, Csize_t, Cuint, Ptr{Cvoid}, Ptr{Cvoid}),
138-
re, subject, sizeof(subject), offset, options, match_data, MATCH_CONTEXT[])
154+
re, subject, sizeof(subject), offset, options, match_data, get_local_match_context())
139155
# rc == -1 means no match, -2 means partial match.
140156
rc < -2 && error("PCRE.exec error: $(err_message(rc))")
141157
rc >= 0
142158
end
143159

160+
function exec_r(re, subject, offset, options)
161+
match_data = create_match_data(re)
162+
ans = exec(re, subject, offset, options, match_data)
163+
free_match_data(match_data)
164+
return ans
165+
end
166+
167+
function exec_r_data(re, subject, offset, options)
168+
match_data = create_match_data(re)
169+
ans = exec(re, subject, offset, options, match_data)
170+
return ans, match_data
171+
end
172+
144173
function create_match_data(re)
145174
ccall((:pcre2_match_data_create_from_pattern_8, PCRE_LIB),
146175
Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}), re, C_NULL)

base/regex.jl

+54-37
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@ mutable struct Regex
2222
compile_options::UInt32
2323
match_options::UInt32
2424
regex::Ptr{Cvoid}
25-
extra::Ptr{Cvoid}
26-
ovec::Vector{Csize_t}
27-
match_data::Ptr{Cvoid}
2825

2926
function Regex(pattern::AbstractString, compile_options::Integer,
3027
match_options::Integer)
@@ -37,11 +34,9 @@ mutable struct Regex
3734
if (match_options & ~PCRE.EXECUTE_MASK) !=0
3835
throw(ArgumentError("invalid regex match options: $match_options"))
3936
end
40-
re = compile(new(pattern, compile_options, match_options, C_NULL,
41-
C_NULL, Csize_t[], C_NULL))
37+
re = compile(new(pattern, compile_options, match_options, C_NULL))
4238
finalizer(re) do re
4339
re.regex == C_NULL || PCRE.free_re(re.regex)
44-
re.match_data == C_NULL || PCRE.free_match_data(re.match_data)
4540
end
4641
re
4742
end
@@ -68,8 +63,6 @@ function compile(regex::Regex)
6863
if regex.regex == C_NULL
6964
regex.regex = PCRE.compile(regex.pattern, regex.compile_options)
7065
PCRE.jit_compile(regex.regex)
71-
regex.match_data = PCRE.create_match_data(regex.regex)
72-
regex.ovec = PCRE.get_ovec(regex.match_data)
7366
end
7467
regex
7568
end
@@ -164,14 +157,12 @@ getindex(m::RegexMatch, name::AbstractString) = m[Symbol(name)]
164157

165158
function occursin(r::Regex, s::AbstractString; offset::Integer=0)
166159
compile(r)
167-
return PCRE.exec(r.regex, String(s), offset, r.match_options,
168-
r.match_data)
160+
return PCRE.exec_r(r.regex, String(s), offset, r.match_options)
169161
end
170162

171163
function occursin(r::Regex, s::SubString; offset::Integer=0)
172164
compile(r)
173-
return PCRE.exec(r.regex, s, offset, r.match_options,
174-
r.match_data)
165+
return PCRE.exec_r(r.regex, s, offset, r.match_options)
175166
end
176167

177168
"""
@@ -198,14 +189,12 @@ true
198189
"""
199190
function startswith(s::AbstractString, r::Regex)
200191
compile(r)
201-
return PCRE.exec(r.regex, String(s), 0, r.match_options | PCRE.ANCHORED,
202-
r.match_data)
192+
return PCRE.exec_r(r.regex, String(s), 0, r.match_options | PCRE.ANCHORED)
203193
end
204194

205195
function startswith(s::SubString, r::Regex)
206196
compile(r)
207-
return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ANCHORED,
208-
r.match_data)
197+
return PCRE.exec_r(r.regex, s, 0, r.match_options | PCRE.ANCHORED)
209198
end
210199

211200
"""
@@ -232,14 +221,12 @@ true
232221
"""
233222
function endswith(s::AbstractString, r::Regex)
234223
compile(r)
235-
return PCRE.exec(r.regex, String(s), 0, r.match_options | PCRE.ENDANCHORED,
236-
r.match_data)
224+
return PCRE.exec_r(r.regex, String(s), 0, r.match_options | PCRE.ENDANCHORED)
237225
end
238226

239227
function endswith(s::SubString, r::Regex)
240228
compile(r)
241-
return PCRE.exec(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED,
242-
r.match_data)
229+
return PCRE.exec_r(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED)
243230
end
244231

245232
"""
@@ -274,36 +261,52 @@ function match end
274261
function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, add_opts::UInt32=UInt32(0))
275262
compile(re)
276263
opts = re.match_options | add_opts
277-
if !PCRE.exec(re.regex, str, idx-1, opts, re.match_data)
264+
matched, data = PCRE.exec_r_data(re.regex, str, idx-1, opts)
265+
if !matched
266+
PCRE.free_match_data(data)
278267
return nothing
279268
end
280-
ovec = re.ovec
281-
n = div(length(ovec),2) - 1
282-
mat = SubString(str, ovec[1]+1, prevind(str, ovec[2]+1))
283-
cap = Union{Nothing,SubString{String}}[ovec[2i+1] == PCRE.UNSET ? nothing :
284-
SubString(str, ovec[2i+1]+1,
285-
prevind(str, ovec[2i+2]+1)) for i=1:n]
286-
off = Int[ ovec[2i+1]+1 for i=1:n ]
287-
RegexMatch(mat, cap, ovec[1]+1, off, re)
269+
n = div(PCRE.ovec_length(data), 2) - 1
270+
p = PCRE.ovec_ptr(data)
271+
mat = SubString(str, unsafe_load(p, 1)+1, prevind(str, unsafe_load(p, 2)+1))
272+
cap = Union{Nothing,SubString{String}}[unsafe_load(p,2i+1) == PCRE.UNSET ? nothing :
273+
SubString(str, unsafe_load(p,2i+1)+1,
274+
prevind(str, unsafe_load(p,2i+2)+1)) for i=1:n]
275+
off = Int[ unsafe_load(p,2i+1)+1 for i=1:n ]
276+
result = RegexMatch(mat, cap, unsafe_load(p,1)+1, off, re)
277+
PCRE.free_match_data(data)
278+
return result
288279
end
289280

290281
match(r::Regex, s::AbstractString) = match(r, s, firstindex(s))
291282
match(r::Regex, s::AbstractString, i::Integer) = throw(ArgumentError(
292283
"regex matching is only available for the String type; use String(s) to convert"
293284
))
294285

286+
findnext(re::Regex, str::Union{String,SubString}, idx::Integer) = _findnext_re(re, str, idx, C_NULL)
287+
295288
# TODO: return only start index and update deprecation
296-
function findnext(re::Regex, str::Union{String,SubString}, idx::Integer)
289+
function _findnext_re(re::Regex, str::Union{String,SubString}, idx::Integer, match_data::Ptr{Cvoid})
297290
if idx > nextind(str,lastindex(str))
298291
throw(BoundsError())
299292
end
300293
opts = re.match_options
301294
compile(re)
302-
if PCRE.exec(re.regex, str, idx-1, opts, re.match_data)
303-
(Int(re.ovec[1])+1):prevind(str,Int(re.ovec[2])+1)
295+
alloc = match_data == C_NULL
296+
if alloc
297+
matched, data = PCRE.exec_r_data(re.regex, str, idx-1, opts)
298+
else
299+
matched = PCRE.exec(re.regex, str, idx-1, opts, match_data)
300+
data = match_data
301+
end
302+
if matched
303+
p = PCRE.ovec_ptr(data)
304+
ans = (Int(unsafe_load(p,1))+1):prevind(str,Int(unsafe_load(p,2))+1)
304305
else
305-
nothing
306+
ans = nothing
306307
end
308+
alloc && PCRE.free_match_data(data)
309+
return ans
307310
end
308311
findnext(r::Regex, s::AbstractString, idx::Integer) = throw(ArgumentError(
309312
"regex search is only available for the String type; use String(s) to convert"
@@ -384,9 +387,23 @@ julia> replace(msg, r"#(.+)# from (?<from>\\w+)" => s"FROM: \\g<from>; MESSAGE:
384387
"""
385388
macro s_str(string) SubstitutionString(string) end
386389

390+
# replacement
391+
392+
struct RegexAndMatchData
393+
re::Regex
394+
match_data::Ptr{Cvoid}
395+
RegexAndMatchData(re::Regex) = (compile(re); new(re, PCRE.create_match_data(re.regex)))
396+
end
397+
398+
findnext(pat::RegexAndMatchData, str, i) = _findnext_re(pat.re, str, i, pat.match_data)
399+
400+
_pat_replacer(r::Regex) = RegexAndMatchData(r)
401+
402+
_free_pat_replacer(r::RegexAndMatchData) = PCRE.free_match_data(r.match_data)
403+
387404
replace_err(repl) = error("Bad replacement string: $repl")
388405

389-
function _write_capture(io, re, group)
406+
function _write_capture(io, re::RegexAndMatchData, group)
390407
len = PCRE.substring_length_bynumber(re.match_data, group)
391408
ensureroom(io, len+1)
392409
PCRE.substring_copy_bynumber(re.match_data, group,
@@ -395,7 +412,7 @@ function _write_capture(io, re, group)
395412
io.size = max(io.size, io.ptr - 1)
396413
end
397414

398-
function _replace(io, repl_s::SubstitutionString, str, r, re)
415+
function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
399416
SUB_CHAR = '\\'
400417
GROUP_CHAR = 'g'
401418
LBRACKET = '<'
@@ -439,8 +456,8 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
439456
if all(isdigit, groupname)
440457
_write_capture(io, re, parse(Int, groupname))
441458
else
442-
group = PCRE.substring_number_from_name(re.regex, groupname)
443-
group < 0 && replace_err("Group $groupname not found in regex $re")
459+
group = PCRE.substring_number_from_name(re.re.regex, groupname)
460+
group < 0 && replace_err("Group $groupname not found in regex $(re.re)")
444461
_write_capture(io, re, group)
445462
end
446463
i = nextind(repl, i)

base/strings/util.jl

+5
Original file line numberDiff line numberDiff line change
@@ -426,13 +426,17 @@ replace(str::String, pat_repl::Pair{<:Union{Tuple{Vararg{<:AbstractChar}},
426426
count::Integer=typemax(Int)) =
427427
replace(str, in(first(pat_repl)) => last(pat_repl), count=count)
428428

429+
_pat_replacer(x) = x
430+
_free_pat_replacer(x) = nothing
431+
429432
function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int))
430433
pattern, repl = pat_repl
431434
count == 0 && return str
432435
count < 0 && throw(DomainError(count, "`count` must be non-negative."))
433436
n = 1
434437
e = lastindex(str)
435438
i = a = firstindex(str)
439+
pattern = _pat_replacer(pattern)
436440
r = something(findnext(pattern,str,i), 0)
437441
j, k = first(r), last(r)
438442
out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
@@ -453,6 +457,7 @@ function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int))
453457
j, k = first(r), last(r)
454458
n += 1
455459
end
460+
_free_pat_replacer(pattern)
456461
write(out, SubString(str,i))
457462
String(take!(out))
458463
end

0 commit comments

Comments
 (0)