@@ -403,7 +403,6 @@ these intervals. It follows from the nature of the L on each interval.
403
403
private readonly StringBuilder _categories ;
404
404
private bool _canonical ;
405
405
private bool _negate ;
406
- private RegexCharClass _subtractor ;
407
406
408
407
#if DEBUG
409
408
static RegexCharClass ( )
@@ -433,20 +432,19 @@ public RegexCharClass()
433
432
_categories = new StringBuilder ( ) ;
434
433
}
435
434
436
- private RegexCharClass ( bool negate , List < SingleRange > ranges , StringBuilder categories , RegexCharClass subtraction )
435
+ private RegexCharClass ( bool negate , List < SingleRange > ranges , StringBuilder categories )
437
436
{
438
437
_rangelist = ranges ;
439
438
_categories = categories ;
440
439
_canonical = true ;
441
440
_negate = negate ;
442
- _subtractor = subtraction ;
443
441
}
444
442
445
443
public bool CanMerge
446
444
{
447
445
get
448
446
{
449
- return ! _negate && _subtractor == null ;
447
+ return ! _negate ;
450
448
}
451
449
}
452
450
@@ -507,12 +505,6 @@ private void AddSet(string set)
507
505
}
508
506
}
509
507
510
- public void AddSubtraction ( RegexCharClass sub )
511
- {
512
- Debug . Assert ( _subtractor == null , "Can't add two subtractions to a char class. " ) ;
513
- _subtractor = sub ;
514
- }
515
-
516
508
/// <summary>
517
509
/// Adds a single range of characters to the class.
518
510
/// </summary>
@@ -715,20 +707,23 @@ public static char SingletonChar(string set)
715
707
716
708
public static bool IsMergeable ( string charClass )
717
709
{
718
- return ( ! IsNegated ( charClass ) && ! IsSubtraction ( charClass ) ) ;
710
+ Debug . Assert ( ! IsSubtraction ( charClass ) ) ;
711
+ return ! IsNegated ( charClass ) ;
719
712
}
720
713
721
714
public static bool IsEmpty ( string charClass )
722
715
{
723
- return ( charClass [ CATEGORYLENGTH ] == 0 && charClass [ FLAGS ] == 0 && charClass [ SETLENGTH ] == 0 && ! IsSubtraction ( charClass ) ) ;
716
+ Debug . Assert ( ! IsSubtraction ( charClass ) ) ;
717
+ return ( charClass [ CATEGORYLENGTH ] == 0 && charClass [ FLAGS ] == 0 && charClass [ SETLENGTH ] == 0 ) ;
724
718
}
725
719
726
720
/// <summary>
727
721
/// <c>true</c> if the set contains a single character only
728
722
/// </summary>
729
723
public static bool IsSingleton ( string set )
730
724
{
731
- if ( set [ FLAGS ] == 0 && set [ CATEGORYLENGTH ] == 0 && set [ SETLENGTH ] == 2 && ! IsSubtraction ( set ) &&
725
+ Debug . Assert ( ! IsSubtraction ( set ) ) ;
726
+ if ( set [ FLAGS ] == 0 && set [ CATEGORYLENGTH ] == 0 && set [ SETLENGTH ] == 2 &&
732
727
( set [ SETSTART ] == LastChar || set [ SETSTART ] + 1 == set [ SETSTART + 1 ] ) )
733
728
return true ;
734
729
else
@@ -737,13 +732,17 @@ public static bool IsSingleton(string set)
737
732
738
733
public static bool IsSingletonInverse ( string set )
739
734
{
740
- if ( set [ FLAGS ] == 1 && set [ CATEGORYLENGTH ] == 0 && set [ SETLENGTH ] == 2 && ! IsSubtraction ( set ) &&
735
+ Debug . Assert ( ! IsSubtraction ( set ) ) ;
736
+ if ( set [ FLAGS ] == 1 && set [ CATEGORYLENGTH ] == 0 && set [ SETLENGTH ] == 2 &&
741
737
( set [ SETSTART ] == LastChar || set [ SETSTART ] + 1 == set [ SETSTART + 1 ] ) )
742
738
return true ;
743
739
else
744
740
return false ;
745
741
}
746
742
743
+ /// <remarks>
744
+ /// Character class subtraction is not supported, used just in assertions in a regressive test fashion.
745
+ /// </remarks>
747
746
private static bool IsSubtraction ( string charClass )
748
747
{
749
748
return ( charClass . Length > SETSTART + charClass [ SETLENGTH ] + charClass [ CATEGORYLENGTH ] ) ;
@@ -775,42 +774,32 @@ public static bool IsWordChar(char ch)
775
774
776
775
public static bool CharInClass ( char ch , string set )
777
776
{
778
- return CharInClassRecursive ( ch , set , 0 ) ;
779
- }
780
-
781
- private static bool CharInClassRecursive ( char ch , string set , int start )
782
- {
783
- int mySetLength = set [ start + SETLENGTH ] ;
784
- int myCategoryLength = set [ start + CATEGORYLENGTH ] ;
785
- int myEndPosition = start + SETSTART + mySetLength + myCategoryLength ;
786
-
787
- bool subtracted = false ;
777
+ int mySetLength = set [ SETLENGTH ] ;
778
+ int myCategoryLength = set [ CATEGORYLENGTH ] ;
779
+ int myEndPosition = SETSTART + mySetLength + myCategoryLength ;
788
780
789
- if ( set . Length > myEndPosition )
790
- {
791
- subtracted = CharInClassRecursive ( ch , set , myEndPosition ) ;
792
- }
781
+ Debug . Assert ( set . Length == myEndPosition ) ;
793
782
794
- bool b = CharInClassInternal ( ch , set , start , mySetLength , myCategoryLength ) ;
783
+ bool b = CharInClassInternal ( ch , set , mySetLength , myCategoryLength ) ;
795
784
796
785
// Note that we apply the negation *before* performing the subtraction. This is because
797
786
// the negation only applies to the first char class, not the entire subtraction.
798
- if ( set [ start + FLAGS ] == 1 )
787
+ if ( set [ FLAGS ] == 1 )
799
788
b = ! b ;
800
789
801
- return b && ! subtracted ;
790
+ return b ;
802
791
}
803
792
804
793
/// <summary>
805
794
/// Determines a character's membership in a character class (via the
806
795
/// string representation of the class).
807
796
/// </summary>
808
- private static bool CharInClassInternal ( char ch , string set , int start , int mySetLength , int myCategoryLength )
797
+ private static bool CharInClassInternal ( char ch , string set , int mySetLength , int myCategoryLength )
809
798
{
810
799
int min ;
811
800
int max ;
812
801
int mid ;
813
- min = start + SETSTART ;
802
+ min = SETSTART ;
814
803
max = min + mySetLength ;
815
804
816
805
while ( min != max )
@@ -829,22 +818,22 @@ private static bool CharInClassInternal(char ch, string set, int start, int mySe
829
818
// SETSTART is odd, we can simplify it out of the equation. But if it changes we need to
830
819
// reverse this check.
831
820
Debug . Assert ( ( SETSTART & 0x1 ) == 1 , "If SETSTART is not odd, the calculation below this will be reversed" ) ;
832
- if ( ( min & 0x1 ) == ( start & 0x1 ) )
821
+ if ( ( min & 0x1 ) == 0 ) // Note: originally ((min & 0x1) == ( start & 0x1)), but start is always 0
833
822
return true ;
834
823
else
835
824
{
836
825
if ( myCategoryLength == 0 )
837
826
return false ;
838
827
839
- return CharInCategory ( ch , set , start , mySetLength , myCategoryLength ) ;
828
+ return CharInCategory ( ch , set , mySetLength , myCategoryLength ) ;
840
829
}
841
830
}
842
831
843
- private static bool CharInCategory ( char ch , string set , int start , int mySetLength , int myCategoryLength )
832
+ private static bool CharInCategory ( char ch , string set , int mySetLength , int myCategoryLength )
844
833
{
845
834
UnicodeCategory chcategory = CharUnicodeInfo . GetUnicodeCategory ( ch ) ;
846
835
847
- int i = start + SETSTART + mySetLength ;
836
+ int i = SETSTART + mySetLength ;
848
837
int end = i + myCategoryLength ;
849
838
while ( i < end )
850
839
{
@@ -993,11 +982,9 @@ private static RegexCharClass ParseRecursive(string charClass, int start)
993
982
ranges . Add ( new SingleRange ( first , last ) ) ;
994
983
}
995
984
996
- RegexCharClass sub = null ;
997
- if ( charClass . Length > myEndPosition )
998
- sub = ParseRecursive ( charClass , myEndPosition ) ;
985
+ Debug . Assert ( charClass . Length == myEndPosition ) ;
999
986
1000
- return new RegexCharClass ( charClass [ start + FLAGS ] == 1 , ranges , new StringBuilder ( charClass . Substring ( end , myCategoryLength ) ) , sub ) ;
987
+ return new RegexCharClass ( charClass [ start + FLAGS ] == 1 , ranges , new StringBuilder ( charClass . Substring ( end , myCategoryLength ) ) ) ;
1001
988
}
1002
989
1003
990
/// <summary>
@@ -1046,9 +1033,6 @@ public string ToStringClass()
1046
1033
1047
1034
vsb . Append ( _categories . ToString ( ) ) ;
1048
1035
1049
- if ( _subtractor != null )
1050
- vsb . Append ( _subtractor . ToStringClass ( ) ) ;
1051
-
1052
1036
return vsb . ToString ( ) ;
1053
1037
}
1054
1038
0 commit comments