@@ -476,9 +476,14 @@ public static function split(
476476 int $ flags = 0 ,
477477 bool $ captureOffset = false ,
478478 bool $ noEmpty = false ,
479+ bool $ utf8Offset = false ,
479480 ): array {
480481 $ flags |= ($ captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0 ) | ($ noEmpty ? PREG_SPLIT_NO_EMPTY : 0 ) | PREG_SPLIT_DELIM_CAPTURE ;
481- return self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags ]);
482+ $ m = self ::pcre ('preg_split ' , [$ pattern , $ subject , -1 , $ flags ]);
483+ if ($ utf8Offset && ($ flags & PREG_SPLIT_OFFSET_CAPTURE )) {
484+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
485+ }
486+ return $ m ;
482487 }
483488
484489
@@ -493,14 +498,22 @@ public static function match(
493498 int $ offset = 0 ,
494499 bool $ captureOffset = false ,
495500 bool $ unmatchedAsNull = false ,
501+ bool $ utf8Offset = false ,
496502 ): ?array {
497503 $ flags |= ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
504+ if ($ utf8Offset ) {
505+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
506+ }
498507 if ($ offset > strlen ($ subject )) {
499508 return null ;
500509 }
501- return self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])
502- ? $ m
503- : null ;
510+ if (!self ::pcre ('preg_match ' , [$ pattern , $ subject , &$ m , $ flags , $ offset ])) {
511+ return null ;
512+ }
513+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
514+ return self ::bytesToChars ($ subject , [$ m ])[0 ];
515+ }
516+ return $ m ;
504517 }
505518
506519
@@ -516,8 +529,12 @@ public static function matchAll(
516529 bool $ captureOffset = false ,
517530 bool $ unmatchedAsNull = false ,
518531 bool $ patternOrder = false ,
532+ bool $ utf8Offset = false ,
519533 ): array {
520534 $ flags |= ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 ) | ($ patternOrder ? PREG_PATTERN_ORDER : 0 );
535+ if ($ utf8Offset ) {
536+ $ offset = strlen (self ::substring ($ subject , 0 , $ offset ));
537+ }
521538 if ($ offset > strlen ($ subject )) {
522539 return [];
523540 }
@@ -526,6 +543,9 @@ public static function matchAll(
526543 ($ flags & PREG_PATTERN_ORDER ) ? $ flags : ($ flags | PREG_SET_ORDER ),
527544 $ offset ,
528545 ]);
546+ if ($ utf8Offset && ($ flags & PREG_OFFSET_CAPTURE )) {
547+ return self ::bytesToChars ($ subject , $ m );
548+ }
529549 return $ m ;
530550 }
531551
@@ -540,12 +560,16 @@ public static function replace(
540560 int $ limit = -1 ,
541561 bool $ captureOffset = false ,
542562 bool $ unmatchedAsNull = false ,
563+ bool $ utf8Offset = false ,
543564 ): string {
544565 if (is_object ($ replacement ) || is_array ($ replacement )) {
545566 if (!is_callable ($ replacement , false , $ textual )) {
546567 throw new Nette \InvalidStateException ("Callback ' $ textual' is not callable. " );
547568 }
548569 $ flags = ($ captureOffset ? PREG_OFFSET_CAPTURE : 0 ) | ($ unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0 );
570+ if ($ utf8Offset && $ captureOffset ) {
571+ $ replacement = fn ($ m ) => $ replacement (self ::bytesToChars ($ subject , [$ m ])[0 ]);
572+ }
549573 return self ::pcre ('preg_replace_callback ' , [$ pattern , $ replacement , $ subject , $ limit , 0 , $ flags ]);
550574
551575 } elseif (is_array ($ pattern ) && is_string (key ($ pattern ))) {
@@ -557,6 +581,22 @@ public static function replace(
557581 }
558582
559583
584+ private static function bytesToChars (string $ s , array $ groups ): array
585+ {
586+ $ lastBytes = $ lastChars = 0 ;
587+ foreach ($ groups as &$ matches ) {
588+ foreach ($ matches as &$ match ) {
589+ if ($ match [1 ] > $ lastBytes ) {
590+ $ lastChars += self ::length (substr ($ s , $ lastBytes , $ match [1 ] - $ lastBytes ));
591+ $ lastBytes = $ match [1 ];
592+ }
593+ $ match [1 ] = $ lastChars ;
594+ }
595+ }
596+ return $ groups ;
597+ }
598+
599+
560600 /** @internal */
561601 public static function pcre (string $ func , array $ args )
562602 {
0 commit comments