@@ -9,13 +9,7 @@ const ID_CONTINUE = /^([0-9a-z]+)(?:\.\.([0-9a-z]+))?[^;]*; ID_Continue /iu
9
9
const BORDER = 0x7f
10
10
const logger = console
11
11
12
- enum Mode {
13
- Small ,
14
- Former ,
15
- Latter ,
16
- }
17
-
18
- // Main
12
+ // Main
19
13
; ( async ( ) => {
20
14
let banner = ""
21
15
const idStartSet : Set < string > = new Set ( )
@@ -50,21 +44,39 @@ enum Mode {
50
44
}
51
45
} )
52
46
47
+ logger . log ( "Normalizing data..." )
48
+ normalizeRanges ( idStartSmall )
49
+ normalizeRanges ( idStartLarge )
50
+ normalizeRanges ( idContinueSmall )
51
+ normalizeRanges ( idContinueLarge )
52
+
53
53
logger . log ( "Generating code..." )
54
54
let code = `${ banner }
55
+
56
+ let largeIdStartPattern: RegExp | null = null;
57
+ let largeIdContinuePattern: RegExp | null = null;
58
+
55
59
export function isIdStart(cp: number): boolean {
56
- ${ makeSmallCondtion ( idStartSmall , Mode . Small ) }
57
- return isLargeIdStart(cp)
60
+ ${ makeSmallCondtion ( idStartSmall ) }
61
+ return isLargeIdStart(cp)
58
62
}
59
63
export function isIdContinue(cp: number): boolean {
60
- ${ makeSmallCondtion ( idContinueSmall , Mode . Small ) }
61
- return isLargeIdStart(cp) || isLargeIdContinue(cp)
64
+ ${ makeSmallCondtion ( idContinueSmall ) }
65
+ return isLargeIdStart(cp) || isLargeIdContinue(cp)
62
66
}
63
67
function isLargeIdStart(cp: number): boolean {
64
- ${ makeCondition ( idStartLarge , Mode . Former ) }
68
+ if (!largeIdStartPattern) {
69
+ largeIdStartPattern = new RegExp(${ makeLargePattern ( idStartLarge ) } , "u")
70
+ }
71
+ return largeIdStartPattern.test(String.fromCodePoint(cp))
65
72
}
66
73
function isLargeIdContinue(cp: number): boolean {
67
- ${ makeCondition ( idContinueLarge , Mode . Former ) }
74
+ if (!largeIdContinuePattern) {
75
+ largeIdContinuePattern = new RegExp(${ makeLargePattern (
76
+ idContinueLarge ,
77
+ ) } , "u")
78
+ }
79
+ return largeIdContinuePattern.test(String.fromCodePoint(cp))
68
80
}`
69
81
70
82
logger . log ( "Formatting code..." )
@@ -111,39 +123,61 @@ function processEachLine(cb: (line: string) => void): Promise<void> {
111
123
} )
112
124
}
113
125
114
- function makeCondition ( ranges : [ number , number ] [ ] , mode : Mode ) : string {
115
- if ( ranges . length < 10 ) {
116
- return makeSmallCondtion ( ranges , mode )
126
+ function normalizeRanges ( ranges : [ number , number ] [ ] ) : void {
127
+ for ( let i = ranges . length - 1 ; i >= 1 ; -- i ) {
128
+ const currRange = ranges [ i ]
129
+ const prevRange = ranges [ i - 1 ]
130
+ if ( currRange [ 0 ] - 1 === prevRange [ 1 ] ) {
131
+ prevRange [ 1 ] = currRange [ 1 ]
132
+ ranges . splice ( i , 1 )
133
+ }
117
134
}
118
-
119
- const middle = ranges . length >> 1
120
- const ranges1 = ranges . slice ( 0 , middle )
121
- const ranges2 = ranges . slice ( middle )
122
- const pivot = ranges2 [ 0 ] [ 0 ]
123
- return `if (cp < 0x${ pivot . toString ( 16 ) } ) {
124
- ${ makeCondition ( ranges1 , Mode . Former ) }
125
- }
126
- ${ makeCondition ( ranges2 , Mode . Latter ) } `
127
135
}
128
136
129
- function makeSmallCondtion ( ranges : [ number , number ] [ ] , mode : Mode ) : string {
137
+ function makeSmallCondtion ( ranges : [ number , number ] [ ] ) : string {
130
138
const conditions : string [ ] = [ ]
131
139
for ( const [ min , max ] of ranges ) {
132
140
if ( min === max ) {
133
141
conditions . push ( `if (cp === 0x${ min . toString ( 16 ) } ) return true` )
134
142
} else {
135
- if ( mode !== Mode . Latter || conditions . length !== 0 ) {
136
- conditions . push ( `if (cp < 0x${ min . toString ( 16 ) } ) return false` )
137
- }
143
+ conditions . push ( `if (cp < 0x${ min . toString ( 16 ) } ) return false` )
138
144
conditions . push ( `if (cp < 0x${ ( max + 1 ) . toString ( 16 ) } ) return true` )
139
145
}
140
146
}
141
- if ( mode === Mode . Former || mode === Mode . Latter ) {
142
- conditions . push ( "return false" )
143
- }
144
147
return conditions . join ( "\n" )
145
148
}
146
149
150
+ function makeLargePattern ( ranges : [ number , number ] [ ] ) : string {
151
+ const lines = [ "^[" ]
152
+ for ( const [ min , max ] of ranges ) {
153
+ const line = lines [ lines . length - 1 ]
154
+ const part =
155
+ min === max
156
+ ? esc ( min )
157
+ : min + 1 === max
158
+ ? `${ esc ( min ) } ${ esc ( max ) } `
159
+ : `${ esc ( min ) } -${ esc ( max ) } `
160
+
161
+ if ( line . length + part . length > 60 ) {
162
+ lines . push ( part )
163
+ } else {
164
+ lines [ lines . length - 1 ] += part
165
+ }
166
+ }
167
+ lines [ lines . length - 1 ] += "]$"
168
+ return lines . map ( line => `"${ line } "` ) . join ( "+" )
169
+ }
170
+
171
+ function esc ( cp : number ) : string {
172
+ if ( cp <= 0xff ) {
173
+ return `\\x${ cp . toString ( 16 ) . padStart ( 2 , "0" ) } `
174
+ }
175
+ if ( cp <= 0xffff ) {
176
+ return `\\u${ cp . toString ( 16 ) . padStart ( 4 , "0" ) } `
177
+ }
178
+ return `\\u{${ cp . toString ( 16 ) } }`
179
+ }
180
+
147
181
function save ( content : string ) : Promise < void > {
148
182
return new Promise ( ( resolve , reject ) => {
149
183
fs . writeFile ( FILE_PATH , content , error =>
0 commit comments