Skip to content

Commit 00abeab

Browse files
committed
⚒ change Unicode utils
1 parent 1ea8075 commit 00abeab

File tree

4 files changed

+327
-2656
lines changed

4 files changed

+327
-2656
lines changed

.eslintignore

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
/.temp
22
/node_modules
3-
/src/unicode
43
/index.*

.eslintrc.yml

+9
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,12 @@ rules:
77
"no-redeclare": "off"
88
# https://github.com/typescript-eslint/typescript-eslint/issues/743
99
"@mysticatea/ts/unbound-method": "off"
10+
11+
overrides:
12+
- files: "./src/unicode/ids.ts"
13+
rules:
14+
curly: "off"
15+
no-misleading-character-class: "off"
16+
- files: "./src/unicode/property-data.ts"
17+
rules:
18+
"@mysticatea/ts/camelcase": "off"

scripts/update-unicode-ids.ts

+66-32
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,7 @@ const ID_CONTINUE = /^([0-9a-z]+)(?:\.\.([0-9a-z]+))?[^;]*; ID_Continue /iu
99
const BORDER = 0x7f
1010
const logger = console
1111

12-
enum Mode {
13-
Small,
14-
Former,
15-
Latter,
16-
}
17-
18-
// Main
12+
// Main
1913
;(async () => {
2014
let banner = ""
2115
const idStartSet: Set<string> = new Set()
@@ -50,21 +44,39 @@ enum Mode {
5044
}
5145
})
5246

47+
logger.log("Normalizing data...")
48+
normalizeRanges(idStartSmall)
49+
normalizeRanges(idStartLarge)
50+
normalizeRanges(idContinueSmall)
51+
normalizeRanges(idContinueLarge)
52+
5353
logger.log("Generating code...")
5454
let code = `${banner}
55+
56+
let largeIdStartPattern: RegExp | null = null;
57+
let largeIdContinuePattern: RegExp | null = null;
58+
5559
export function isIdStart(cp: number): boolean {
56-
${makeSmallCondtion(idStartSmall, Mode.Small)}
57-
return isLargeIdStart(cp)
60+
${makeSmallCondtion(idStartSmall)}
61+
return isLargeIdStart(cp)
5862
}
5963
export function isIdContinue(cp: number): boolean {
60-
${makeSmallCondtion(idContinueSmall, Mode.Small)}
61-
return isLargeIdStart(cp) || isLargeIdContinue(cp)
64+
${makeSmallCondtion(idContinueSmall)}
65+
return isLargeIdStart(cp) || isLargeIdContinue(cp)
6266
}
6367
function isLargeIdStart(cp: number): boolean {
64-
${makeCondition(idStartLarge, Mode.Former)}
68+
if (!largeIdStartPattern) {
69+
largeIdStartPattern = new RegExp(${makeLargePattern(idStartLarge)}, "u")
70+
}
71+
return largeIdStartPattern.test(String.fromCodePoint(cp))
6572
}
6673
function isLargeIdContinue(cp: number): boolean {
67-
${makeCondition(idContinueLarge, Mode.Former)}
74+
if (!largeIdContinuePattern) {
75+
largeIdContinuePattern = new RegExp(${makeLargePattern(
76+
idContinueLarge,
77+
)}, "u")
78+
}
79+
return largeIdContinuePattern.test(String.fromCodePoint(cp))
6880
}`
6981

7082
logger.log("Formatting code...")
@@ -111,39 +123,61 @@ function processEachLine(cb: (line: string) => void): Promise<void> {
111123
})
112124
}
113125

114-
function makeCondition(ranges: [number, number][], mode: Mode): string {
115-
if (ranges.length < 10) {
116-
return makeSmallCondtion(ranges, mode)
126+
function normalizeRanges(ranges: [number, number][]): void {
127+
for (let i = ranges.length - 1; i >= 1; --i) {
128+
const currRange = ranges[i]
129+
const prevRange = ranges[i - 1]
130+
if (currRange[0] - 1 === prevRange[1]) {
131+
prevRange[1] = currRange[1]
132+
ranges.splice(i, 1)
133+
}
117134
}
118-
119-
const middle = ranges.length >> 1
120-
const ranges1 = ranges.slice(0, middle)
121-
const ranges2 = ranges.slice(middle)
122-
const pivot = ranges2[0][0]
123-
return `if (cp < 0x${pivot.toString(16)}) {
124-
${makeCondition(ranges1, Mode.Former)}
125-
}
126-
${makeCondition(ranges2, Mode.Latter)}`
127135
}
128136

129-
function makeSmallCondtion(ranges: [number, number][], mode: Mode): string {
137+
function makeSmallCondtion(ranges: [number, number][]): string {
130138
const conditions: string[] = []
131139
for (const [min, max] of ranges) {
132140
if (min === max) {
133141
conditions.push(`if (cp === 0x${min.toString(16)}) return true`)
134142
} else {
135-
if (mode !== Mode.Latter || conditions.length !== 0) {
136-
conditions.push(`if (cp < 0x${min.toString(16)}) return false`)
137-
}
143+
conditions.push(`if (cp < 0x${min.toString(16)}) return false`)
138144
conditions.push(`if (cp < 0x${(max + 1).toString(16)}) return true`)
139145
}
140146
}
141-
if (mode === Mode.Former || mode === Mode.Latter) {
142-
conditions.push("return false")
143-
}
144147
return conditions.join("\n")
145148
}
146149

150+
function makeLargePattern(ranges: [number, number][]): string {
151+
const lines = ["^["]
152+
for (const [min, max] of ranges) {
153+
const line = lines[lines.length - 1]
154+
const part =
155+
min === max
156+
? esc(min)
157+
: min + 1 === max
158+
? `${esc(min)}${esc(max)}`
159+
: `${esc(min)}-${esc(max)}`
160+
161+
if (line.length + part.length > 60) {
162+
lines.push(part)
163+
} else {
164+
lines[lines.length - 1] += part
165+
}
166+
}
167+
lines[lines.length - 1] += "]$"
168+
return lines.map(line => `"${line}"`).join("+")
169+
}
170+
171+
function esc(cp: number): string {
172+
if (cp <= 0xff) {
173+
return `\\x${cp.toString(16).padStart(2, "0")}`
174+
}
175+
if (cp <= 0xffff) {
176+
return `\\u${cp.toString(16).padStart(4, "0")}`
177+
}
178+
return `\\u{${cp.toString(16)}}`
179+
}
180+
147181
function save(content: string): Promise<void> {
148182
return new Promise((resolve, reject) => {
149183
fs.writeFile(FILE_PATH, content, error =>

0 commit comments

Comments
 (0)