@@ -22,9 +22,6 @@ mutable struct Regex
22
22
compile_options:: UInt32
23
23
match_options:: UInt32
24
24
regex:: Ptr{Cvoid}
25
- extra:: Ptr{Cvoid}
26
- ovec:: Vector{Csize_t}
27
- match_data:: Ptr{Cvoid}
28
25
29
26
function Regex (pattern:: AbstractString , compile_options:: Integer ,
30
27
match_options:: Integer )
@@ -37,11 +34,9 @@ mutable struct Regex
37
34
if (match_options & ~ PCRE. EXECUTE_MASK) != 0
38
35
throw (ArgumentError (" invalid regex match options: $match_options " ))
39
36
end
40
- re = compile (new (pattern, compile_options, match_options, C_NULL ,
41
- C_NULL , Csize_t[], C_NULL ))
37
+ re = compile (new (pattern, compile_options, match_options, C_NULL ))
42
38
finalizer (re) do re
43
39
re. regex == C_NULL || PCRE. free_re (re. regex)
44
- re. match_data == C_NULL || PCRE. free_match_data (re. match_data)
45
40
end
46
41
re
47
42
end
@@ -68,8 +63,6 @@ function compile(regex::Regex)
68
63
if regex. regex == C_NULL
69
64
regex. regex = PCRE. compile (regex. pattern, regex. compile_options)
70
65
PCRE. jit_compile (regex. regex)
71
- regex. match_data = PCRE. create_match_data (regex. regex)
72
- regex. ovec = PCRE. get_ovec (regex. match_data)
73
66
end
74
67
regex
75
68
end
@@ -164,14 +157,12 @@ getindex(m::RegexMatch, name::AbstractString) = m[Symbol(name)]
164
157
165
158
function occursin (r:: Regex , s:: AbstractString ; offset:: Integer = 0 )
166
159
compile (r)
167
- return PCRE. exec (r. regex, String (s), offset, r. match_options,
168
- r. match_data)
160
+ return PCRE. exec_r (r. regex, String (s), offset, r. match_options)
169
161
end
170
162
171
163
function occursin (r:: Regex , s:: SubString ; offset:: Integer = 0 )
172
164
compile (r)
173
- return PCRE. exec (r. regex, s, offset, r. match_options,
174
- r. match_data)
165
+ return PCRE. exec_r (r. regex, s, offset, r. match_options)
175
166
end
176
167
177
168
"""
@@ -198,14 +189,12 @@ true
198
189
"""
199
190
function startswith (s:: AbstractString , r:: Regex )
200
191
compile (r)
201
- return PCRE. exec (r. regex, String (s), 0 , r. match_options | PCRE. ANCHORED,
202
- r. match_data)
192
+ return PCRE. exec_r (r. regex, String (s), 0 , r. match_options | PCRE. ANCHORED)
203
193
end
204
194
205
195
function startswith (s:: SubString , r:: Regex )
206
196
compile (r)
207
- return PCRE. exec (r. regex, s, 0 , r. match_options | PCRE. ANCHORED,
208
- r. match_data)
197
+ return PCRE. exec_r (r. regex, s, 0 , r. match_options | PCRE. ANCHORED)
209
198
end
210
199
211
200
"""
@@ -232,14 +221,12 @@ true
232
221
"""
233
222
function endswith (s:: AbstractString , r:: Regex )
234
223
compile (r)
235
- return PCRE. exec (r. regex, String (s), 0 , r. match_options | PCRE. ENDANCHORED,
236
- r. match_data)
224
+ return PCRE. exec_r (r. regex, String (s), 0 , r. match_options | PCRE. ENDANCHORED)
237
225
end
238
226
239
227
function endswith (s:: SubString , r:: Regex )
240
228
compile (r)
241
- return PCRE. exec (r. regex, s, 0 , r. match_options | PCRE. ENDANCHORED,
242
- r. match_data)
229
+ return PCRE. exec_r (r. regex, s, 0 , r. match_options | PCRE. ENDANCHORED)
243
230
end
244
231
245
232
"""
@@ -274,36 +261,52 @@ function match end
274
261
function match (re:: Regex , str:: Union{SubString{String}, String} , idx:: Integer , add_opts:: UInt32 = UInt32 (0 ))
275
262
compile (re)
276
263
opts = re. match_options | add_opts
277
- if ! PCRE. exec (re. regex, str, idx- 1 , opts, re. match_data)
264
+ matched, data = PCRE. exec_r_data (re. regex, str, idx- 1 , opts)
265
+ if ! matched
266
+ PCRE. free_match_data (data)
278
267
return nothing
279
268
end
280
- ovec = re. ovec
281
- n = div (length (ovec),2 ) - 1
282
- mat = SubString (str, ovec[1 ]+ 1 , prevind (str, ovec[2 ]+ 1 ))
283
- cap = Union{Nothing,SubString{String}}[ovec[2 i+ 1 ] == PCRE. UNSET ? nothing :
284
- SubString (str, ovec[2 i+ 1 ]+ 1 ,
285
- prevind (str, ovec[2 i+ 2 ]+ 1 )) for i= 1 : n]
286
- off = Int[ ovec[2 i+ 1 ]+ 1 for i= 1 : n ]
287
- RegexMatch (mat, cap, ovec[1 ]+ 1 , off, re)
269
+ n = div (PCRE. ovec_length (data), 2 ) - 1
270
+ p = PCRE. ovec_ptr (data)
271
+ mat = SubString (str, unsafe_load (p, 1 )+ 1 , prevind (str, unsafe_load (p, 2 )+ 1 ))
272
+ cap = Union{Nothing,SubString{String}}[unsafe_load (p,2 i+ 1 ) == PCRE. UNSET ? nothing :
273
+ SubString (str, unsafe_load (p,2 i+ 1 )+ 1 ,
274
+ prevind (str, unsafe_load (p,2 i+ 2 )+ 1 )) for i= 1 : n]
275
+ off = Int[ unsafe_load (p,2 i+ 1 )+ 1 for i= 1 : n ]
276
+ result = RegexMatch (mat, cap, unsafe_load (p,1 )+ 1 , off, re)
277
+ PCRE. free_match_data (data)
278
+ return result
288
279
end
289
280
290
281
match (r:: Regex , s:: AbstractString ) = match (r, s, firstindex (s))
291
282
match (r:: Regex , s:: AbstractString , i:: Integer ) = throw (ArgumentError (
292
283
" regex matching is only available for the String type; use String(s) to convert"
293
284
))
294
285
286
+ findnext (re:: Regex , str:: Union{String,SubString} , idx:: Integer ) = _findnext_re (re, str, idx, C_NULL )
287
+
295
288
# TODO : return only start index and update deprecation
296
- function findnext (re:: Regex , str:: Union{String,SubString} , idx:: Integer )
289
+ function _findnext_re (re:: Regex , str:: Union{String,SubString} , idx:: Integer , match_data :: Ptr{Cvoid} )
297
290
if idx > nextind (str,lastindex (str))
298
291
throw (BoundsError ())
299
292
end
300
293
opts = re. match_options
301
294
compile (re)
302
- if PCRE. exec (re. regex, str, idx- 1 , opts, re. match_data)
303
- (Int (re. ovec[1 ])+ 1 ): prevind (str,Int (re. ovec[2 ])+ 1 )
295
+ alloc = match_data == C_NULL
296
+ if alloc
297
+ matched, data = PCRE. exec_r_data (re. regex, str, idx- 1 , opts)
298
+ else
299
+ matched = PCRE. exec (re. regex, str, idx- 1 , opts, match_data)
300
+ data = match_data
301
+ end
302
+ if matched
303
+ p = PCRE. ovec_ptr (data)
304
+ ans = (Int (unsafe_load (p,1 ))+ 1 ): prevind (str,Int (unsafe_load (p,2 ))+ 1 )
304
305
else
305
- nothing
306
+ ans = nothing
306
307
end
308
+ alloc && PCRE. free_match_data (data)
309
+ return ans
307
310
end
308
311
findnext (r:: Regex , s:: AbstractString , idx:: Integer ) = throw (ArgumentError (
309
312
" regex search is only available for the String type; use String(s) to convert"
@@ -384,9 +387,23 @@ julia> replace(msg, r"#(.+)# from (?<from>\\w+)" => s"FROM: \\g<from>; MESSAGE:
384
387
"""
385
388
macro s_str (string) SubstitutionString (string) end
386
389
390
+ # replacement
391
+
392
+ struct RegexAndMatchData
393
+ re:: Regex
394
+ match_data:: Ptr{Cvoid}
395
+ RegexAndMatchData (re:: Regex ) = (compile (re); new (re, PCRE. create_match_data (re. regex)))
396
+ end
397
+
398
+ findnext (pat:: RegexAndMatchData , str, i) = _findnext_re (pat. re, str, i, pat. match_data)
399
+
400
+ _pat_replacer (r:: Regex ) = RegexAndMatchData (r)
401
+
402
+ _free_pat_replacer (r:: RegexAndMatchData ) = PCRE. free_match_data (r. match_data)
403
+
387
404
replace_err (repl) = error (" Bad replacement string: $repl " )
388
405
389
- function _write_capture (io, re, group)
406
+ function _write_capture (io, re:: RegexAndMatchData , group)
390
407
len = PCRE. substring_length_bynumber (re. match_data, group)
391
408
ensureroom (io, len+ 1 )
392
409
PCRE. substring_copy_bynumber (re. match_data, group,
@@ -395,7 +412,7 @@ function _write_capture(io, re, group)
395
412
io. size = max (io. size, io. ptr - 1 )
396
413
end
397
414
398
- function _replace (io, repl_s:: SubstitutionString , str, r, re)
415
+ function _replace (io, repl_s:: SubstitutionString , str, r, re:: RegexAndMatchData )
399
416
SUB_CHAR = ' \\ '
400
417
GROUP_CHAR = ' g'
401
418
LBRACKET = ' <'
@@ -439,8 +456,8 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
439
456
if all (isdigit, groupname)
440
457
_write_capture (io, re, parse (Int, groupname))
441
458
else
442
- group = PCRE. substring_number_from_name (re. regex, groupname)
443
- group < 0 && replace_err (" Group $groupname not found in regex $re " )
459
+ group = PCRE. substring_number_from_name (re. re . regex, groupname)
460
+ group < 0 && replace_err (" Group $groupname not found in regex $(re . re) " )
444
461
_write_capture (io, re, group)
445
462
end
446
463
i = nextind (repl, i)
0 commit comments