diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 18d53a3f0..82201166c 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2025-01-27, 18:09:08 GMT +# Date: 2025-02-24, 14:36:03 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2095,6 +2095,7 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 187F8..187FF ; 17.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF 18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 +1B168 ; 17.0 # KATAKANA LETTER SMALL ARCHAIC YE 1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL 1CEBA..1CED0 ; 17.0 # [23] FRAGILE SYMBOL..LEUKOTHEA 1CEE0..1CEF0 ; 17.0 # [17] GEOMANTIC FIGURE POPULUS..MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR @@ -2116,6 +2117,6 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 2B73A..2B73E ; 17.0 # [5] CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73E 323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 4836 +# Total code points: 4837 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index c5df66f61..8a3f13ae1 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2025-02-14, 00:13:14 GMT +# Date: 2025-02-24, 14:36:18 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1349,7 +1349,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1B132 ; Alphabetic # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; Alphabetic # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; Alphabetic # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; Alphabetic # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; Alphabetic # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; Alphabetic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; Alphabetic # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; Alphabetic # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -1471,7 +1471,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 147441 +# Total code points: 147442 # ================================================ @@ -6937,7 +6937,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1B132 ; ID_Start # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; ID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; ID_Start # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; ID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; ID_Start # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; ID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; ID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ID_Start # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -7044,7 +7044,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; ID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145935 +# Total code points: 145936 # ================================================ @@ -8335,7 +8335,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1B132 ; ID_Continue # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; ID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; ID_Continue # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; ID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; ID_Continue # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; ID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; ID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; ID_Continue # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -8484,7 +8484,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..33479 ; ID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149273 +# Total code points: 149274 # ================================================ @@ -9172,7 +9172,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1B132 ; XID_Start # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; XID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; XID_Start # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; XID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; XID_Start # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; XID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; XID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; XID_Start # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -9279,7 +9279,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; XID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145912 +# Total code points: 145913 # ================================================ @@ -10571,7 +10571,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1B132 ; XID_Continue # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; XID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; XID_Continue # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; XID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; XID_Continue # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; XID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; XID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; XID_Continue # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -10720,7 +10720,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..33479 ; XID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149254 +# Total code points: 149255 # ================================================ @@ -12808,7 +12808,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1B132 ; Grapheme_Base # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; Grapheme_Base # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; Grapheme_Base # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; Grapheme_Base # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; Grapheme_Base # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; Grapheme_Base # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; Grapheme_Base # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; Grapheme_Base # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -13016,7 +13016,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 157523 +# Total code points: 157524 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a5c270b19..4436ba15b 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2025-01-27, 18:09:15 GMT +# Date: 2025-02-24, 14:52:12 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2398,7 +2398,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1B132 ; W # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; W # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; W # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; W # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; W # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; W # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index b7e7fae28..a4583094d 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2025-02-14, 15:13:07 GMT +# Date: 2025-02-24, 14:36:23 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3307,7 +3307,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1B132 ; CJ # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; CJ # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; CJ # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; CJ # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; CJ # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; ID # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; AL # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; AL # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 139c00537..6e5b3f138 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2025-01-27, 18:09:39 GMT +# Date: 2025-02-24, 14:36:41 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1574,9 +1574,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E 1B120..1B122 ; Katakana # Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU 1B155 ; Katakana # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; Katakana # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE -# Total code points: 321 +# Total code points: 322 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 26b78592a..4511cc0f1 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -32215,6 +32215,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1B165;KATAKANA LETTER SMALL WE;Lo;0;L;;;;;N;;;;; 1B166;KATAKANA LETTER SMALL WO;Lo;0;L;;;;;N;;;;; 1B167;KATAKANA LETTER SMALL N;Lo;0;L;;;;;N;;;;; +1B168;KATAKANA LETTER SMALL ARCHAIC YE;Lo;0;L;;;;;N;;;;; 1B170;NUSHU CHARACTER-1B170;Lo;0;L;;;;;N;;;;; 1B171;NUSHU CHARACTER-1B171;Lo;0;L;;;;;N;;;;; 1B172;NUSHU CHARACTER-1B172;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 71770a13d..074b6440e 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2025-01-29 +# Date: 2025-02-24, 14:59:00 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2246,8 +2246,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1B153..1B154 ; U # Cn [2] .. 1B155 ; Tu # Lo KATAKANA LETTER SMALL KO 1B156..1B163 ; U # Cn [14] .. -1B164..1B167 ; Tu # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N -1B168..1B16F ; U # Cn [8] .. +1B164..1B168 ; Tu # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE +1B169..1B16F ; U # Cn [7] .. 1B170..1B2FB ; U # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1B2FC..1B2FF ; U # Cn [4] .. 1BC00..1BC6A ; R # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html index a00ddfe78..5ce899170 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.html @@ -7,7 +7,7 @@

Line_Break Chart

Unicode Version: 17.0.0

-

Date: 2025-02-14, 17:30:27 GMT

+

Date: 2025-02-24, 14:52:15 GMT

This page illustrates the application of the Line_Break specification. The material here is informative, not normative.

The first chart shows where breaks would appear between different sample characters or strings. The sample characters are chosen mechanically to represent the different properties used by the specification.

Each cell shows the break-status for the position between the character(s) in its row header and the character(s) in its column header. The symbol × indicates a prohibited break, even with intervening spaces; the ÷ symbol indicates a (direct) break; the symbol ∻ indicates a break only in the presence of an intervening space (an indirect break).The cells with × or ∻ are also shaded to make it easier to scan the table. For example, in the cell at the intersection of the row headed by “CR” and the column headed by “LF”, there is a × symbol, indicating that there is no break between CR and LF.

In the row and column headers of the Table, in the Rules, when hovering over characters in the Samples, and in the comments in the associated list of test cases LineBreakTest.txt:

  1. The following sets are used:
      diff --git a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt index eb5171518..5e6229aa2 100644 --- a/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt +++ b/unicodetools/data/ucd/dev/auxiliary/LineBreakTest.txt @@ -1,5 +1,5 @@ # LineBreakTest-17.0.0.txt -# Date: 2025-02-14, 17:30:30 GMT +# Date: 2025-02-24, 14:52:17 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 0e5dc2e11..fab42ae15 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:39 GMT +# Date: 2025-02-24, 14:36:42 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2550,7 +2550,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1B132 ; OLetter # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; OLetter # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; OLetter # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; OLetter # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; OLetter # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; OLetter # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; OLetter # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; OLetter # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -2622,7 +2622,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; OLetter # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 141520 +# Total code points: 141521 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e5a5b9937..d966ee65d 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:43 GMT +# Date: 2025-02-24, 14:36:43 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -682,9 +682,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E 1B120..1B122 ; Katakana # Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU 1B155 ; Katakana # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; Katakana # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE -# Total code points: 331 +# Total code points: 332 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index a4dc4250a..e8813c1d0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-02-24, 14:36:16 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1126,7 +1126,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1B132 ; L # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; L # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; L # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; L # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; L # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; L # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; L # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; L # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -1234,7 +1234,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 810584 code points not listed here. +# The above property value applies to 810583 code points not listed here. # Total code points: 1095402 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3a10fc1e4..f27f189f9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-02-24, 14:36:17 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1871,7 +1871,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1B132 ; 0 # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; 0 # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; 0 # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; 0 # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; 0 # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; 0 # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; 0 # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; 0 # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -2095,7 +2095,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 816745 code points not listed here. +# The above property value applies to 816744 code points not listed here. # Total code points: 1113143 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c3d0bb02a..291407a26 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2025-01-27, 18:09:12 GMT +# Date: 2025-02-24, 14:52:09 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2144,8 +2144,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760566 code points not listed here. -# Total code points: 792267 +# The above property value applies to 760565 code points not listed here. +# Total code points: 792266 # ================================================ @@ -2559,7 +2559,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 1B132 ; W # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; W # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; W # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; W # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; W # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; W # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1D300..1D356 ; W # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING 1D360..1D376 ; W # No [23] COUNTING ROD UNIT DIGIT ONE..IDEOGRAPHIC TALLY MARK FIVE @@ -2622,7 +2622,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..33479 ; W # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 # The above property value applies to 56179 code points not listed here. -# Total code points: 182768 +# Total code points: 182769 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 22b9a85f6..b0e092391 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2025-01-27, 18:09:13 GMT +# Date: 2025-02-24, 14:36:19 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -590,7 +590,7 @@ FFFE..FFFF ; Cn # [2] .. 1B133..1B14F ; Cn # [29] .. 1B153..1B154 ; Cn # [2] .. 1B156..1B163 ; Cn # [14] .. -1B168..1B16F ; Cn # [8] .. +1B169..1B16F ; Cn # [7] .. 1B2FC..1BBFF ; Cn # [2308] .. 1BC6B..1BC6F ; Cn # [5] .. 1BC7D..1BC7F ; Cn # [3] .. @@ -754,7 +754,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 814697 +# Total code points: 814696 # ================================================ @@ -2670,7 +2670,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1B132 ; Lo # HIRAGANA LETTER SMALL KO 1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; Lo # KATAKANA LETTER SMALL KO -1B164..1B167 ; Lo # [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; Lo # [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; Lo # [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; Lo # [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M 1BC70..1BC7C ; Lo # [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -2738,7 +2738,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Lo # [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 141081 +# Total code points: 141082 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 010826221..cba2be4c1 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2025-02-14, 17:30:22 GMT +# Date: 2025-02-24, 14:36:20 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,8 +70,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757136 code points not listed here. -# Total code points: 894604 +# The above property value applies to 757135 code points not listed here. +# Total code points: 894603 # ================================================ @@ -3815,9 +3815,9 @@ FF70 ; CJ # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 1B132 ; CJ # Lo HIRAGANA LETTER SMALL KO 1B150..1B152 ; CJ # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO 1B155 ; CJ # Lo KATAKANA LETTER SMALL KO -1B164..1B167 ; CJ # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B164..1B168 ; CJ # Lo [5] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL ARCHAIC YE -# Total code points: 60 +# Total code points: 61 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 5876213a1..f31a4d79e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2025-01-27, 18:09:14 GMT +# Date: 2025-02-24, 14:36:20 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -38415,6 +38415,7 @@ FFFD ; REPLACEMENT CHARACTER 1B165 ; KATAKANA LETTER SMALL WE 1B166 ; KATAKANA LETTER SMALL WO 1B167 ; KATAKANA LETTER SMALL N +1B168 ; KATAKANA LETTER SMALL ARCHAIC YE 1B170..1B2FB ; NUSHU CHARACTER-* 1BC00 ; DUPLOYAN LETTER H 1BC01 ; DUPLOYAN LETTER X @@ -45870,6 +45871,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 159834 +# Total code points: 159835 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/194.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/194.txt new file mode 100644 index 000000000..7a27a218c --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/194.txt @@ -0,0 +1,18 @@ +# Kana: 1B168 KATAKANA LETTER SMALL ARCHAIC YE +# https://github.com/unicode-org/utc-release-management/issues/194 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring Block: +Propertywise [\x{1B168} \N{KATAKANA LETTER SMALL ARCHAIC YE} + \x{30E3} ャ \N{KATAKANA LETTER SMALL YA}] AreAlike +end Ignoring; + +end Ignoring; + +end Ignoring; \ No newline at end of file