Skip to content

Commit 103a119

Browse files
committed
"Han" class
fixes #8
1 parent 1e6d05a commit 103a119

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

src/Peachpie.Library.RegularExpressions/RegexCharClass.cs

+5
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ internal sealed class RegexCharClass
7474
internal const string ECMADigitClass = "\x00\x02\x00" + ECMADigitSet;
7575
internal const string NotECMADigitClass = "\x01\x02\x00" + ECMADigitSet;
7676

77+
private const string HanClass = "\u2E80\u2FE0\u2FF0\u3040\u3400\u4DC0\u4E00\uA000\uF900\uFB00\U00016FE0\U00017000\U00020000\U0002A6E0\U0002A700\U0002EE60\U0002F800\U0002FA20\U00030000\U000323B0";
78+
7779
internal const string AnyClass = "\x00\x01\x00\x00";
7880
internal const string EmptyClass = "\x00\x00\x00";
7981

@@ -142,6 +144,8 @@ internal sealed class RegexCharClass
142144
* XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 4.0 spec (www.unicode.org),
143145
* and Perl 5.6 (see Programming Perl, 3rd edition page 167). Three blocks defined by Perl (and here) may
144146
* not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates.
147+
*
148+
* Additional blocks listed at https://en.wikipedia.org/wiki/Unicode_block .
145149
*
146150
**/
147151
// Has to be sorted by the first column
@@ -159,6 +163,7 @@ internal sealed class RegexCharClass
159163
new[] {"Greek", "\u0370\u0400"},
160164
new[] {"Gujarati", "\u0A80\u0B00"},
161165
new[] {"Gurmukhi", "\u0A00\u0A80"},
166+
new[] {"Han", HanClass },
162167
new[] {"Hanunoo", "\u1720\u1740"},
163168
new[] {"Hebrew", "\u0590\u0600"},
164169
new[] {"Hiragana", "\u3040\u30A0"},

tests/Peachpie.Library.RegularExpressions.Tests/PcreTests.cs

+8
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ public void TestArabic()
6464

6565
Assert.True(match(@"/^\p{Arabic}+/u", "إن").Success);
6666
}
67+
68+
[Fact]
69+
public void TestHan()
70+
{
71+
// '/^\p{Han}+$/u', $word
72+
73+
Assert.True(match(@"/^\p{Han}+$/u", "\u3000").Success);
74+
}
6775

6876
[Fact]
6977
public void TestParseException()

0 commit comments

Comments
 (0)