From 242668d23ed04095697056cd347ac174c93e7ca5 Mon Sep 17 00:00:00 2001 From: Roozbeh Pournader Date: Thu, 16 Oct 2025 17:12:48 -0700 Subject: [PATCH] Add confusables for various dandas and double dandas See https://github.com/unicode-org/properties/issues/468 --- .../data/security/dev/confusables.txt | 42 ++++++++---- .../data/security/dev/confusablesSummary.txt | 64 ++++++++++--------- .../dev/data/source/confusables-source.txt | 27 ++++++++ .../dev/data/source/formatted-source.txt | 29 ++++++++- 4 files changed, 118 insertions(+), 44 deletions(-) diff --git a/unicodetools/data/security/dev/confusables.txt b/unicodetools/data/security/dev/confusables.txt index 3a601ea8c..d4a7422a4 100644 --- a/unicodetools/data/security/dev/confusables.txt +++ b/unicodetools/data/security/dev/confusables.txt @@ -1,5 +1,5 @@ # confusables.txt -# Date: 2025-10-11, 02:30:37 GMT +# Date: 2025-10-17, 00:06:13 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -573,14 +573,8 @@ A78F ; 00B7 ; MA # ( ꞏ → · ) LATIN LETTER SINOLOGICAL DOT → MIDDLE DOT # 18C2 ; 00B7 18C0 ; MA # ( ᣂ → ·ᣀ ) CANADIAN SYLLABICS SHWOY → MIDDLE DOT, CANADIAN SYLLABICS SHOY # →ᐧᣀ→ -A830 ; 0964 ; MA #* ( ꠰ → । ) NORTH INDIC FRACTION ONE QUARTER → DEVANAGARI DANDA # - -0965 ; 0964 0964 ; MA #* ( ॥ → ।। ) DEVANAGARI DOUBLE DANDA → DEVANAGARI DANDA, DEVANAGARI DANDA # - 1C3C ; 1C3B 1C3B ; MA #* ( ᰼ → ᰻᰻ ) LEPCHA PUNCTUATION NYET THYOOM TA-ROL → LEPCHA PUNCTUATION TA-ROL, LEPCHA PUNCTUATION TA-ROL # -104B ; 104A 104A ; MA #* ( ။ → ၊၊ ) MYANMAR SIGN SECTION → MYANMAR SIGN LITTLE SECTION, MYANMAR SIGN LITTLE SECTION # - 1AA9 ; 1AA8 1AA8 ; MA #* ( ᪩ → ᪨᪨ ) TAI THAM SIGN KAANKUU → TAI THAM SIGN KAAN, TAI THAM SIGN KAAN # 1AAB ; 1AAA 1AA8 ; MA #* ( ᪫ → ᪪᪨ ) TAI THAM SIGN SATKAANKUU → TAI THAM SIGN SATKAAN, TAI THAM SIGN KAAN # @@ -589,12 +583,6 @@ A830 ; 0964 ; MA #* ( ꠰ → । ) NORTH INDIC FRACTION ONE QUARTER → DEVANAG 10A57 ; 10A56 10A56 ; MA #* ( ‎𐩗‎ → ‎𐩖𐩖‎ ) KHAROSHTHI PUNCTUATION DOUBLE DANDA → KHAROSHTHI PUNCTUATION DANDA, KHAROSHTHI PUNCTUATION DANDA # -1144C ; 1144B 1144B ; MA #* ( 𑑌 → 𑑋𑑋 ) NEWA DOUBLE DANDA → NEWA DANDA, NEWA DANDA # - -11642 ; 11641 11641 ; MA #* ( 𑙂 → 𑙁𑙁 ) MODI DOUBLE DANDA → MODI DANDA, MODI DANDA # - -11C42 ; 11C41 11C41 ; MA #* ( 𑱂 → 𑱁𑱁 ) BHAIKSUKI DOUBLE DANDA → BHAIKSUKI DANDA, BHAIKSUKI DANDA # - 1C7F ; 1C7E 1C7E ; MA #* ( ᱿ → ᱾᱾ ) OL CHIKI PUNCTUATION DOUBLE MUCAAD → OL CHIKI PUNCTUATION MUCAAD, OL CHIKI PUNCTUATION MUCAAD # 055D ; 0027 ; MA #* ( ՝ → ' ) ARMENIAN COMMA → APOSTROPHE # →ˋ→→`→→‘→ @@ -2615,7 +2603,20 @@ A740 ; 004B 0335 ; MA # ( Ꝁ → K̵ ) LATIN CAPITAL LETTER K WITH STROKE → L 0198 ; 004B 0027 ; MA # ( Ƙ → K' ) LATIN CAPITAL LETTER K WITH HOOK → LATIN CAPITAL LETTER K, APOSTROPHE # →Kʽ→ +0964 ; 006C ; MA #* ( । → l ) DEVANAGARI DANDA → LATIN SMALL LETTER L # →|→ +A8CE ; 006C ; MA #* ( ꣎ → l ) SAURASHTRA DANDA → LATIN SMALL LETTER L # →|→ +104A ; 006C ; MA #* ( ၊ → l ) MYANMAR SIGN LITTLE SECTION → LATIN SMALL LETTER L # →|→ +AA5D ; 006C ; MA #* ( ꩝ → l ) CHAM PUNCTUATION DANDA → LATIN SMALL LETTER L # →|→ +11047 ; 006C ; MA #* ( 𑁇 → l ) BRAHMI DANDA → LATIN SMALL LETTER L # →|→ +110C0 ; 006C ; MA #* ( 𑃀 → l ) KAITHI DANDA → LATIN SMALL LETTER L # →|→ +11141 ; 006C ; MA #* ( 𑅁 → l ) CHAKMA DANDA → LATIN SMALL LETTER L # →|→ +111C5 ; 006C ; MA #* ( 𑇅 → l ) SHARADA DANDA → LATIN SMALL LETTER L # →|→ +113D4 ; 006C ; MA #* ( 𑏔 → l ) TULU-TIGALARI DANDA → LATIN SMALL LETTER L # →|→ +1144B ; 006C ; MA #* ( 𑑋 → l ) NEWA DANDA → LATIN SMALL LETTER L # →|→ +11641 ; 006C ; MA #* ( 𑙁 → l ) MODI DANDA → LATIN SMALL LETTER L # →|→ +11C41 ; 006C ; MA #* ( 𑱁 → l ) BHAIKSUKI DANDA → LATIN SMALL LETTER L # →|→ 05C0 ; 006C ; MA #* ( ‎׀‎ → l ) HEBREW PUNCTUATION PASEQ → LATIN SMALL LETTER L # →|→ +115C5 ; 006C ; MA #* ( 𑗅 → l ) SIDDHAM SEPARATOR BAR → LATIN SMALL LETTER L # →|→ 007C ; 006C ; MA #* ( | → l ) VERTICAL LINE → LATIN SMALL LETTER L # 2223 ; 006C ; MA #* ( ∣ → l ) DIVIDES → LATIN SMALL LETTER L # →ǀ→ 23FD ; 006C ; MA #* ( ⏽ → l ) POWER ON SYMBOL → LATIN SMALL LETTER L # →I→ @@ -2633,6 +2634,7 @@ FFE8 ; 006C ; MA #* ( │ → l ) HALFWIDTH FORMS LIGHT VERTICAL → LATIN SMALL 1D7ED ; 006C ; MA # ( 𝟭 → l ) MATHEMATICAL SANS-SERIF BOLD DIGIT ONE → LATIN SMALL LETTER L # →1→ 1D7F7 ; 006C ; MA # ( 𝟷 → l ) MATHEMATICAL MONOSPACE DIGIT ONE → LATIN SMALL LETTER L # →1→ 1FBF1 ; 006C ; MA # ( 🯱 → l ) SEGMENTED DIGIT ONE → LATIN SMALL LETTER L # →1→ +A830 ; 006C ; MA #* ( ꠰ → l ) NORTH INDIC FRACTION ONE QUARTER → LATIN SMALL LETTER L # →।→→|→ 0049 ; 006C ; MA # ( I → l ) LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # FF29 ; 006C ; MA # ( I → l ) FULLWIDTH LATIN CAPITAL LETTER I → LATIN SMALL LETTER L # →Ӏ→ 2160 ; 006C ; MA # ( Ⅰ → l ) ROMAN NUMERAL ONE → LATIN SMALL LETTER L # →Ӏ→ @@ -2694,6 +2696,7 @@ A4F2 ; 006C ; MA # ( ꓲ → l ) LISU LETTER I → LATIN SMALL LETTER L # →I 16F28 ; 006C ; MA # ( 𖼨 → l ) MIAO LETTER GHA → LATIN SMALL LETTER L # →I→ 1028A ; 006C ; MA # ( 𐊊 → l ) LYCIAN LETTER J → LATIN SMALL LETTER L # →I→ 10309 ; 006C ; MA # ( 𐌉 → l ) OLD ITALIC LETTER I → LATIN SMALL LETTER L # →I→ +16D63 ; 006C ; MA # ( 𖵣 → l ) KIRAT RAI VOWEL SIGN AA → LATIN SMALL LETTER L # →|→ 1D22A ; 004C ; MA #* ( 𝈪 → L ) GREEK INSTRUMENTAL NOTATION SYMBOL-23 → LATIN CAPITAL LETTER L # 216C ; 004C ; MA # ( Ⅼ → L ) ROMAN NUMERAL FIFTY → LATIN CAPITAL LETTER L # @@ -2811,6 +2814,17 @@ FE87 ; 006C 0655 ; MA # ( ‎ﺇ‎ → lٕ ) ARABIC LETTER ALEF WITH HAMZA BELO 01C7 ; 004C 004A ; MA # ( LJ → LJ ) LATIN CAPITAL LETTER LJ → LATIN CAPITAL LETTER L, LATIN CAPITAL LETTER J # +0965 ; 006C 006C ; MA #* ( ॥ → ll ) DEVANAGARI DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +A8CF ; 006C 006C ; MA #* ( ꣏ → ll ) SAURASHTRA DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +104B ; 006C 006C ; MA #* ( ။ → ll ) MYANMAR SIGN SECTION → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +11048 ; 006C 006C ; MA #* ( 𑁈 → ll ) BRAHMI DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +110C1 ; 006C 006C ; MA #* ( 𑃁 → ll ) KAITHI DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +11142 ; 006C 006C ; MA #* ( 𑅂 → ll ) CHAKMA DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +111C6 ; 006C 006C ; MA #* ( 𑇆 → ll ) SHARADA DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +113D5 ; 006C 006C ; MA #* ( 𑏕 → ll ) TULU-TIGALARI DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +1144C ; 006C 006C ; MA #* ( 𑑌 → ll ) NEWA DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +11642 ; 006C 006C ; MA #* ( 𑙂 → ll ) MODI DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ +11C42 ; 006C 006C ; MA #* ( 𑱂 → ll ) BHAIKSUKI DOUBLE DANDA → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ 2016 ; 006C 006C ; MA #* ( ‖ → ll ) DOUBLE VERTICAL LINE → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →∥→→||→ 2225 ; 006C 006C ; MA #* ( ∥ → ll ) PARALLEL TO → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →||→ 2161 ; 006C 006C ; MA # ( Ⅱ → ll ) ROMAN NUMERAL TWO → LATIN SMALL LETTER L, LATIN SMALL LETTER L # →II→ @@ -9964,5 +9978,5 @@ FACE ; 9F9C ; MA # ( 龜 → 龜 ) CJK COMPATIBILITY IDEOGRAPH-FACE → CJK UNIF 2FD5 ; 9FA0 ; MA #* ( ⿕ → 龠 ) KANGXI RADICAL FLUTE → CJK UNIFIED IDEOGRAPH-9FA0 # -# total: 6562 +# total: 6582 diff --git a/unicodetools/data/security/dev/confusablesSummary.txt b/unicodetools/data/security/dev/confusablesSummary.txt index 2f032c129..675a54008 100644 --- a/unicodetools/data/security/dev/confusablesSummary.txt +++ b/unicodetools/data/security/dev/confusablesSummary.txt @@ -1,5 +1,5 @@ # confusablesSummary.txt -# Date: 2025-10-11, 02:30:37 GMT +# Date: 2025-10-17, 00:06:13 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1057,9 +1057,12 @@ ← (‎ 𑷠点 ‎) 11DE0 70B9 TOLONG SIKI DIGIT ZERO, CJK UNIFIED IDEOGRAPH-70B9 ← (‎ ㍘ ‎) 3358 IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO -# l 𑷚 𑷡 𖺪 I 1 | Ɩ ǀ ӏ ו ן ا ١ ۱ Ι І Ӏ ߊ ᛁ Ⲓ ⵏ ꓲ 𐊊 𐌉 𖼨 ׀ ∣ 𐌠 𞣇 ⏽ 🯱 𜳱 𜳞 Ⅰ ⅼ I l ℐ ℑ ℓ 𞸀 𞺀 ﺍ ﺎ 𝐈 𝐥 𝐼 𝑙 𝑰 𝒍 𝓁 𝓘 𝓵 𝔩 𝕀 𝕝 𝕴 𝖑 𝖨 𝗅 𝗜 𝗹 𝘐 𝘭 𝙄 𝙡 𝙸 𝚕 𝚰 𝛪 𝜤 𝝞 𝞘 𝟏 𝟙 𝟣 𝟭 𝟷 │ +# l । 𖵣 𑏔 𑷚 𑷡 𖺪 I 1 | Ɩ ǀ ӏ ו ן ا ١ ۱ Ι І Ӏ ߊ ᛁ Ⲓ ⵏ ꓲ 𐊊 𐌉 𖼨 ׀ ၊ ∣ ꠰ ꣎ ꩝ 𐌠 𑁇 𑃀 𑅁 𑇅 𑗅 𑙁 𞣇 ⏽ 𑑋 𑱁 🯱 𜳱 𜳞 Ⅰ ⅼ I l ℐ ℑ ℓ 𞸀 𞺀 ﺍ ﺎ 𝐈 𝐥 𝐼 𝑙 𝑰 𝒍 𝓁 𝓘 𝓵 𝔩 𝕀 𝕝 𝕴 𝖑 𝖨 𝗅 𝗜 𝗹 𝘐 𝘭 𝙄 𝙡 𝙸 𝚕 𝚰 𝛪 𝜤 𝝞 𝞘 𝟏 𝟙 𝟣 𝟭 𝟷 │ (‎ 1 ‎) 0031 DIGIT ONE ← (‎ l ‎) 006C LATIN SMALL LETTER L +← (‎ । ‎) 0964 DEVANAGARI DANDA # →|→→l→ +← (‎ 𖵣 ‎) 16D63 KIRAT RAI VOWEL SIGN AA # →|→→l→ +← (‎ 𑏔 ‎) 113D4 TULU-TIGALARI DANDA # →|→→l→ ← (‎ 𑷚 ‎) 11DDA TOLONG SIKI SIGN HECAKA # →|→→l→ ← (‎ 𑷡 ‎) 11DE1 TOLONG SIKI DIGIT ONE # →|→→l→ ← (‎ 𖺪 ‎) 16EAA BERIA ERFE CAPITAL LETTER LAKKO # →I→ @@ -1085,10 +1088,22 @@ ← (‎ 𐌉 ‎) 10309 OLD ITALIC LETTER I # →I→ ← (‎ 𖼨 ‎) 16F28 MIAO LETTER GHA # →I→ ← (‎ ׀ ‎) 05C0 HEBREW PUNCTUATION PASEQ # →|→→l→ +← (‎ ၊ ‎) 104A MYANMAR SIGN LITTLE SECTION # →|→→l→ ← (‎ ∣ ‎) 2223 DIVIDES # →ǀ→→I→ +← (‎ ꠰ ‎) A830 NORTH INDIC FRACTION ONE QUARTER # →।→→|→→l→ +← (‎ ꣎ ‎) A8CE SAURASHTRA DANDA # →|→→l→ +← (‎ ꩝ ‎) AA5D CHAM PUNCTUATION DANDA # →|→→l→ ← (‎ 𐌠 ‎) 10320 OLD ITALIC NUMERAL ONE # →𐌉→→I→ +← (‎ 𑁇 ‎) 11047 BRAHMI DANDA # →|→→l→ +← (‎ 𑃀 ‎) 110C0 KAITHI DANDA # →|→→l→ +← (‎ 𑅁 ‎) 11141 CHAKMA DANDA # →|→→l→ +← (‎ 𑇅 ‎) 111C5 SHARADA DANDA # →|→→l→ +← (‎ 𑗅 ‎) 115C5 SIDDHAM SEPARATOR BAR # →|→→l→ +← (‎ 𑙁 ‎) 11641 MODI DANDA # →|→→l→ ← (‎ 𞣇 ‎) 1E8C7 MENDE KIKAKUI DIGIT ONE # →l→ ← (‎ ⏽ ‎) 23FD POWER ON SYMBOL # →I→ +← (‎ 𑑋 ‎) 1144B NEWA DANDA # →|→→l→ +← (‎ 𑱁 ‎) 11C41 BHAIKSUKI DANDA # →|→→l→ ← (‎ 🯱 ‎) 1FBF1 SEGMENTED DIGIT ONE ← (‎ 𜳱 ‎) 1CCF1 OUTLINED DIGIT ONE ← (‎ 𜳞 ‎) 1CCDE OUTLINED LATIN CAPITAL LETTER I # →I→ @@ -1179,17 +1194,32 @@ ← (‎ l𑷠点 ‎) 006C 11DE0 70B9 LATIN SMALL LETTER L, TOLONG SIKI DIGIT ZERO, CJK UNIFIED IDEOGRAPH-70B9 ← (‎ ㍢ ‎) 3362 IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TEN -# ll 𑷚𑷚 II 11 || וו ǁ װ ‖ ∥ Ⅱ +# ll ।। ၊၊ 𑙁𑙁 𑑋𑑋 𑱁𑱁 II 11 || וו 𑏕 ǁ װ ॥ ။ ‖ ∥ ꣏ 𑁈 𑃁 𑅂 𑇆 𑙂 𑑌 𑱂 Ⅱ (‎ 11 ‎) 0031 0031 DIGIT ONE, DIGIT ONE ← (‎ ll ‎) 006C 006C LATIN SMALL LETTER L, LATIN SMALL LETTER L # →‎וו‎→ -← (‎ 𑷚𑷚 ‎) 11DDA 11DDA TOLONG SIKI SIGN HECAKA, TOLONG SIKI SIGN HECAKA # →||→→ll→→‎וו‎→ +← (‎ ।। ‎) 0964 0964 DEVANAGARI DANDA, DEVANAGARI DANDA # →॥→→||→→ll→→‎וו‎→ +← (‎ ၊၊ ‎) 104A 104A MYANMAR SIGN LITTLE SECTION, MYANMAR SIGN LITTLE SECTION # →။→→||→→ll→→‎וו‎→ +← (‎ 𑙁𑙁 ‎) 11641 11641 MODI DANDA, MODI DANDA # →𑙂→→||→→ll→→‎וו‎→ +← (‎ 𑑋𑑋 ‎) 1144B 1144B NEWA DANDA, NEWA DANDA # →𑑌→→||→→ll→→‎וו‎→ +← (‎ 𑱁𑱁 ‎) 11C41 11C41 BHAIKSUKI DANDA, BHAIKSUKI DANDA # →𑱂→→||→→ll→→‎וו‎→ ← (‎ II ‎) 0049 0049 LATIN CAPITAL LETTER I, LATIN CAPITAL LETTER I # →ll→→‎וו‎→ ← (‎ || ‎) 007C 007C VERTICAL LINE, VERTICAL LINE # →ll→→‎וו‎→ ← (‎ וו ‎) 05D5 05D5 HEBREW LETTER VAV, HEBREW LETTER VAV +← (‎ 𑏕 ‎) 113D5 TULU-TIGALARI DOUBLE DANDA # →||→→ll→→‎וו‎→ ← (‎ ǁ ‎) 01C1 LATIN LETTER LATERAL CLICK # →‖→→∥→→||→→ll→→‎וו‎→ ← (‎ װ ‎) 05F0 HEBREW LIGATURE YIDDISH DOUBLE VAV # →‎וו‎→ +← (‎ ॥ ‎) 0965 DEVANAGARI DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ ။ ‎) 104B MYANMAR SIGN SECTION # →||→→ll→→‎וו‎→ ← (‎ ‖ ‎) 2016 DOUBLE VERTICAL LINE # →∥→→||→→ll→→‎וו‎→ ← (‎ ∥ ‎) 2225 PARALLEL TO # →||→→ll→→‎וו‎→ +← (‎ ꣏ ‎) A8CF SAURASHTRA DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ 𑁈 ‎) 11048 BRAHMI DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ 𑃁 ‎) 110C1 KAITHI DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ 𑅂 ‎) 11142 CHAKMA DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ 𑇆 ‎) 111C6 SHARADA DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ 𑙂 ‎) 11642 MODI DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ 𑑌 ‎) 1144C NEWA DOUBLE DANDA # →||→→ll→→‎וו‎→ +← (‎ 𑱂 ‎) 11C42 BHAIKSUKI DOUBLE DANDA # →||→→ll→→‎וו‎→ ← (‎ Ⅱ ‎) 2161 ROMAN NUMERAL TWO # →II→→ll→→‎וו‎→ # ll. 11. ⒒ @@ -8738,14 +8768,6 @@ ← (‎ ੍ ‎) 0A4D GURMUKHI SIGN VIRAMA ← (‎ ્ ‎) 0ACD GUJARATI SIGN VIRAMA -# । ꠰ - (‎ । ‎) 0964 DEVANAGARI DANDA -← (‎ ꠰ ‎) A830 NORTH INDIC FRACTION ONE QUARTER - -# ।। ॥ - (‎ ।। ‎) 0964 0964 DEVANAGARI DANDA, DEVANAGARI DANDA -← (‎ ॥ ‎) 0965 DEVANAGARI DOUBLE DANDA - # २ ર ૨ (‎ २ ‎) 0968 DEVANAGARI DIGIT TWO ← (‎ ર ‎) 0AB0 GUJARATI LETTER RA # →૨→ @@ -9631,10 +9653,6 @@ (‎ ၁ ‎) 1041 MYANMAR DIGIT ONE ← (‎ ၥ ‎) 1065 MYANMAR LETTER WESTERN PWO KAREN THA -# ၊၊ ။ - (‎ ၊၊ ‎) 104A 104A MYANMAR SIGN LITTLE SECTION, MYANMAR SIGN LITTLE SECTION -← (‎ ။ ‎) 104B MYANMAR SIGN SECTION - # ၽှ ၾ (‎ ၽှ ‎) 107D 103E MYANMAR LETTER SHAN PHA, MYANMAR CONSONANT SIGN MEDIAL HA ← (‎ ၾ ‎) 107E MYANMAR LETTER SHAN FA @@ -17243,10 +17261,6 @@ (‎ 𑐯 ‎) 1142F NEWA LETTER LHA ← (‎ 𑐴𑑂𑐮 ‎) 11434 11442 1142E NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER LA -# 𑑋𑑋 𑑌 - (‎ 𑑋𑑋 ‎) 1144B 1144B NEWA DANDA, NEWA DANDA -← (‎ 𑑌 ‎) 1144C NEWA DOUBLE DANDA - # 𑖂 𑗘 𑗙 (‎ 𑖂 ‎) 11582 SIDDHAM LETTER I ← (‎ 𑗘 ‎) 115D8 SIDDHAM LETTER THREE-CIRCLE ALTERNATE I @@ -17268,10 +17282,6 @@ (‎ 𑖳 ‎) 115B3 SIDDHAM VOWEL SIGN UU ← (‎ 𑗝 ‎) 115DD SIDDHAM VOWEL SIGN ALTERNATE UU -# 𑙁𑙁 𑙂 - (‎ 𑙁𑙁 ‎) 11641 11641 MODI DANDA, MODI DANDA -← (‎ 𑙂 ‎) 11642 MODI DOUBLE DANDA - # 𑫥𑫥 𑫨 (‎ 𑫥𑫥 ‎) 11AE5 11AE5 PAU CIN HAU RISING TONE LONG, PAU CIN HAU RISING TONE LONG ← (‎ 𑫨 ‎) 11AE8 PAU CIN HAU RISING TONE LONG FINAL @@ -17329,10 +17339,6 @@ ← (‎ 𑫳𑫵 ‎) 11AF3 11AF5 PAU CIN HAU LOW-FALLING TONE LONG, PAU CIN HAU GLOTTAL STOP ← (‎ 𑫸 ‎) 11AF8 PAU CIN HAU GLOTTAL STOP FINAL # →𑫳𑫵→ -# 𑱁𑱁 𑱂 - (‎ 𑱁𑱁 ‎) 11C41 11C41 BHAIKSUKI DANDA, BHAIKSUKI DANDA -← (‎ 𑱂 ‎) 11C42 BHAIKSUKI DOUBLE DANDA - # 𑲪 𑲲 (‎ 𑲪 ‎) 11CAA MARCHEN SUBJOINED LETTER RA ← (‎ 𑲲 ‎) 11CB2 MARCHEN VOWEL SIGN U @@ -17798,5 +17804,5 @@ (‎ 𪘀 ‎) 2A600 CJK UNIFIED IDEOGRAPH-2A600 ← (‎ 𪘀 ‎) 2FA1D CJK COMPATIBILITY IDEOGRAPH-2FA1D -# total : 7606 +# total : 7630 diff --git a/unicodetools/data/security/dev/data/source/confusables-source.txt b/unicodetools/data/security/dev/data/source/confusables-source.txt index 1af4a7155..69573a6b0 100644 --- a/unicodetools/data/security/dev/data/source/confusables-source.txt +++ b/unicodetools/data/security/dev/data/source/confusables-source.txt @@ -5759,3 +5759,30 @@ A7F1 ; 02E2 # ( ꟱ → ˢ ) MODIFIER LETTER CAPITAL S → MODIFIER LETTER SMAL # Confusables data for U+00A1 INVERTED EXCLAMATION MARK (PAG ref #453) 00A1 ; 0069 + +# Confusables data for dandas and double dandas (PAG ref #468) +0964 ; 007C # DEVANAGARI DANDA +104A ; 007C # MYANMAR SIGN LITTLE SECTION +A8CE ; 007C # SAURASHTRA DANDA +11047 ; 007C # BRAHMI DANDA +110C0 ; 007C # KAITHI DANDA +11141 ; 007C # CHAKMA DANDA +111C5 ; 007C # SHARADA DANDA +1144B ; 007C # NEWA DANDA +11641 ; 007C # MODI DANDA +11C41 ; 007C # BHAIKSUKI DANDA +AA5D ; 007C # CHAM PUNCTUATION DANDA +113D4 ; 007C # TULU-TIGALARI DANDA +115C5 ; 007C # SIDDHAM SEPARATOR BAR +16D63 ; 007C # KIRAT RAI VOWEL SIGN AA +0965 ; 007C 007C # DEVANAGARI DOUBLE DANDA +104B ; 007C 007C # MYANMAR SIGN SECTION +A8CF ; 007C 007C # SAURASHTRA DOUBLE DANDA +11048 ; 007C 007C # BRAHMI DOUBLE DANDA +110C1 ; 007C 007C # KAITHI DOUBLE DANDA +11142 ; 007C 007C # CHAKMA DOUBLE DANDA +111C6 ; 007C 007C # SHARADA DOUBLE DANDA +1144C ; 007C 007C # NEWA DOUBLE DANDA +11642 ; 007C 007C # MODI DOUBLE DANDA +11C42 ; 007C 007C # BHAIKSUKI DOUBLE DANDA +113D5 ; 007C 007C # TULU-TIGALARI DOUBLE DANDA diff --git a/unicodetools/data/security/dev/data/source/formatted-source.txt b/unicodetools/data/security/dev/data/source/formatted-source.txt index 3fc6390dc..b7b6606bf 100644 --- a/unicodetools/data/security/dev/data/source/formatted-source.txt +++ b/unicodetools/data/security/dev/data/source/formatted-source.txt @@ -1,5 +1,5 @@ # formatted-source.txt -# Date: 2025-10-11, 02:30:35 GMT +# Date: 2025-10-17, 00:06:11 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -879,9 +879,31 @@ 007C ; 01C0 #* ( | ~ ǀ ) VERTICAL LINE ~ LATIN LETTER DENTAL CLICK 007C ; 05C0 #* ( | ~ ‎׀‎ ) VERTICAL LINE ~ HEBREW PUNCTUATION PASEQ +007C ; 104A #* ( | ~ ၊ ) VERTICAL LINE ~ MYANMAR SIGN LITTLE SECTION +007C ; A8CE #* ( | ~ ꣎ ) VERTICAL LINE ~ SAURASHTRA DANDA +007C ; AA5D #* ( | ~ ꩝ ) VERTICAL LINE ~ CHAM PUNCTUATION DANDA +007C ; 11047 #* ( | ~ 𑁇 ) VERTICAL LINE ~ BRAHMI DANDA +007C ; 110C0 #* ( | ~ 𑃀 ) VERTICAL LINE ~ KAITHI DANDA +007C ; 11141 #* ( | ~ 𑅁 ) VERTICAL LINE ~ CHAKMA DANDA +007C ; 111C5 #* ( | ~ 𑇅 ) VERTICAL LINE ~ SHARADA DANDA +007C ; 115C5 #* ( | ~ 𑗅 ) VERTICAL LINE ~ SIDDHAM SEPARATOR BAR +007C ; 11641 #* ( | ~ 𑙁 ) VERTICAL LINE ~ MODI DANDA +007C ; 1144B #* ( | ~ 𑑋 ) VERTICAL LINE ~ NEWA DANDA +007C ; 11C41 #* ( | ~ 𑱁 ) VERTICAL LINE ~ BHAIKSUKI DANDA 007C ; FFE8 #* ( | ~ │ ) VERTICAL LINE ~ HALFWIDTH FORMS LIGHT VERTICAL +007C 007C ; 113D5 #* ( || ~ 𑏕 ) VERTICAL LINE, VERTICAL LINE ~ TULU-TIGALARI DOUBLE DANDA +007C 007C ; 0965 #* ( || ~ ॥ ) VERTICAL LINE, VERTICAL LINE ~ DEVANAGARI DOUBLE DANDA +007C 007C ; 104B #* ( || ~ ။ ) VERTICAL LINE, VERTICAL LINE ~ MYANMAR SIGN SECTION 007C 007C ; 2225 #* ( || ~ ∥ ) VERTICAL LINE, VERTICAL LINE ~ PARALLEL TO +007C 007C ; A8CF #* ( || ~ ꣏ ) VERTICAL LINE, VERTICAL LINE ~ SAURASHTRA DOUBLE DANDA +007C 007C ; 11048 #* ( || ~ 𑁈 ) VERTICAL LINE, VERTICAL LINE ~ BRAHMI DOUBLE DANDA +007C 007C ; 110C1 #* ( || ~ 𑃁 ) VERTICAL LINE, VERTICAL LINE ~ KAITHI DOUBLE DANDA +007C 007C ; 11142 #* ( || ~ 𑅂 ) VERTICAL LINE, VERTICAL LINE ~ CHAKMA DOUBLE DANDA +007C 007C ; 111C6 #* ( || ~ 𑇆 ) VERTICAL LINE, VERTICAL LINE ~ SHARADA DOUBLE DANDA +007C 007C ; 11642 #* ( || ~ 𑙂 ) VERTICAL LINE, VERTICAL LINE ~ MODI DOUBLE DANDA +007C 007C ; 1144C #* ( || ~ 𑑌 ) VERTICAL LINE, VERTICAL LINE ~ NEWA DOUBLE DANDA +007C 007C ; 11C42 #* ( || ~ 𑱂 ) VERTICAL LINE, VERTICAL LINE ~ BHAIKSUKI DOUBLE DANDA 007E 0308 ; 2368 #* ( ~̈ ~ ⍨ ) TILDE, COMBINING DIAERESIS ~ APL FUNCTIONAL SYMBOL TILDE DIAERESIS @@ -2016,6 +2038,7 @@ 0957 ; 0A42 # ( ॗ ~ ੂ ) DEVANAGARI VOWEL SIGN UUE ~ GURMUKHI VOWEL SIGN UU +0964 ; 007C #* ( । ~ | ) DEVANAGARI DANDA ~ VERTICAL LINE 0964 ; A830 #* ( । ~ ꠰ ) DEVANAGARI DANDA ~ NORTH INDIC FRACTION ONE QUARTER 0964 0964 ; 0965 #* ( ।। ~ ॥ ) DEVANAGARI DANDA, DEVANAGARI DANDA ~ DEVANAGARI DOUBLE DANDA @@ -4530,6 +4553,8 @@ A99D ; A9A3 # ( ꦝ ~ ꦣ ) JAVANESE LETTER DDA ~ JAVANESE LETTER DA MAHAPRANA 10EFA ; 0348 # ( 𐻺 ~ ͈ ) ARABIC DOUBLE VERTICAL BAR BELOW ~ COMBINING DOUBLE VERTICAL LINE BELOW +113D4 ; 007C #* ( 𑏔 ~ | ) TULU-TIGALARI DANDA ~ VERTICAL LINE + 11434 11442 11412 ; 11413 # ( 𑐴𑑂𑐒 ~ 𑐓 ) NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER NGA ~ NEWA LETTER NGHA 11434 11442 11418 ; 11419 # ( 𑐴𑑂𑐘 ~ 𑐙 ) NEWA LETTER HA, NEWA SIGN VIRAMA, NEWA LETTER NYA ~ NEWA LETTER NYHA @@ -4603,6 +4628,8 @@ A99D ; A9A3 # ( ꦝ ~ ꦣ ) JAVANESE LETTER DDA ~ JAVANESE LETTER DA MAHAPRANA 132F9 ; 2625 # ( 𓋹 ~ ☥ ) EGYPTIAN HIEROGLYPH S034 ~ ANKH +16D63 ; 007C # ( 𖵣 ~ | ) KIRAT RAI VOWEL SIGN AA ~ VERTICAL LINE + 16EA6 ; 041F # ( 𖺦 ~ П ) BERIA ERFE CAPITAL LETTER HIRDEABO ~ CYRILLIC CAPITAL LETTER PE 16EAA ; 0049 # ( 𖺪 ~ I ) BERIA ERFE CAPITAL LETTER LAKKO ~ LATIN CAPITAL LETTER I