@@ -74,6 +74,8 @@ internal sealed class RegexCharClass
74
74
internal const string ECMADigitClass = "\x00 \x02 \x00 " + ECMADigitSet ;
75
75
internal const string NotECMADigitClass = "\x01 \x02 \x00 " + ECMADigitSet ;
76
76
77
+ private const string HanClass = "\u2E80 \u2FE0 \u2FF0 \u3040 \u3400 \u4DC0 \u4E00 \uA000 \uF900 \uFB00 \U00016FE0 \U00017000 \U00020000 \U0002A6E0 \U0002A700 \U0002EE60 \U0002F800 \U0002FA20 \U00030000 \U000323B0 " ;
78
+
77
79
internal const string AnyClass = "\x00 \x01 \x00 \x00 " ;
78
80
internal const string EmptyClass = "\x00 \x00 \x00 " ;
79
81
@@ -142,6 +144,8 @@ internal sealed class RegexCharClass
142
144
* XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 4.0 spec (www.unicode.org),
143
145
* and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may
144
146
* not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates.
147
+ *
148
+ * Additional blocks listed at https://en.wikipedia.org/wiki/Unicode_block .
145
149
*
146
150
**/
147
151
// Has to be sorted by the first column
@@ -159,6 +163,7 @@ internal sealed class RegexCharClass
159
163
new [ ] { "Greek" , "\u0370 \u0400 " } ,
160
164
new [ ] { "Gujarati" , "\u0A80 \u0B00 " } ,
161
165
new [ ] { "Gurmukhi" , "\u0A00 \u0A80 " } ,
166
+ new [ ] { "Han" , HanClass } ,
162
167
new [ ] { "Hanunoo" , "\u1720 \u1740 " } ,
163
168
new [ ] { "Hebrew" , "\u0590 \u0600 " } ,
164
169
new [ ] { "Hiragana" , "\u3040 \u30A0 " } ,
0 commit comments