Skip to content

Commit 2bd358f

Browse files
committed
🐛 fix valid Unicode Properties (fixes #6)
1 parent 00abeab commit 2bd358f

File tree

6 files changed

+418
-478
lines changed

6 files changed

+418
-478
lines changed

package.json

+5-1
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
"devDependencies": {
1414
"@mysticatea/eslint-plugin": "^11.0.0",
1515
"@types/eslint": "^4.16.2",
16+
"@types/jsdom": "^12.2.4",
1617
"@types/mocha": "^5.2.2",
1718
"@types/node": "^12.6.8",
1819
"coveralls": "^3.0.1",
1920
"dts-bundle": "^0.7.3",
2021
"eslint": "^6.1.0",
22+
"jsdom": "^15.1.1",
2123
"mocha": "^6.2.0",
2224
"npm-run-all": "^4.1.5",
2325
"nyc": "^14.1.1",
@@ -40,7 +42,9 @@
4042
"pretest": "run-s build lint",
4143
"test": "nyc _mocha \"test/*.ts\" --reporter dot --timeout 10000",
4244
"update:test": "ts-node scripts/update-fixtures.ts",
43-
"update:ids": "ts-node scripts/update-unicode-ids.ts",
45+
"update:unicode": "run-s update:unicode:*",
46+
"update:unicode:ids": "ts-node scripts/update-unicode-ids.ts",
47+
"update:unicode:props": "ts-node scripts/update-unicode-properties.ts",
4448
"preversion": "npm test",
4549
"version": "npm run -s build",
4650
"postversion": "git push && git push --tags",

scripts/update-unicode-properties.ts

+213
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
import fs from "fs"
2+
import { JSDOM, DOMWindow } from "jsdom"
3+
import { CLIEngine } from "eslint"
4+
5+
const DataSources = [
6+
{
7+
url: "https://www.ecma-international.org/ecma-262/9.0/",
8+
version: 2018,
9+
binProperties: "#table-binary-unicode-properties",
10+
gcValues: "#table-unicode-general-category-values",
11+
scValues: "#table-unicode-script-values",
12+
},
13+
{
14+
url: "https://www.ecma-international.org/ecma-262/10.0/",
15+
version: 2019,
16+
binProperties: "#table-binary-unicode-properties",
17+
gcValues: "#table-unicode-general-category-values",
18+
scValues: "#table-unicode-script-values",
19+
},
20+
{
21+
url: "https://tc39.es/ecma262/",
22+
version: 2020,
23+
binProperties: "#table-binary-unicode-properties",
24+
gcValues: "#table-unicode-general-category-values",
25+
scValues: "#table-unicode-script-values",
26+
},
27+
]
28+
const FILE_PATH = "src/unicode/properties.ts"
29+
const logger = console
30+
31+
type Datum = {
32+
binProperties: string[]
33+
gcValues: string[]
34+
scValues: string[]
35+
}
36+
37+
// Main
38+
;(async () => {
39+
const data: Record<number, Datum> = Object.create(null)
40+
const existing = {
41+
binProperties: new Set<string>(),
42+
gcValues: new Set<string>(),
43+
scValues: new Set<string>(),
44+
}
45+
46+
for (const {
47+
binProperties,
48+
gcValues,
49+
scValues,
50+
url,
51+
version,
52+
} of DataSources) {
53+
logger.log("---- ECMAScript %d ----", version)
54+
const datum: Datum = {
55+
binProperties: [],
56+
gcValues: [],
57+
scValues: [],
58+
}
59+
data[version] = datum
60+
61+
let window: DOMWindow | null = null
62+
do {
63+
try {
64+
logger.log("Fetching data from %o", url)
65+
;({ window } = await JSDOM.fromURL(url))
66+
} catch (error) {
67+
if (!error || error.message !== "Error: socket hang up") {
68+
throw error
69+
}
70+
logger.log("Failed: %s", error)
71+
await new Promise(resolve => setTimeout(resolve, 2000))
72+
}
73+
} while (window == null)
74+
75+
logger.log("Parsing tables")
76+
datum.binProperties = collectValues(
77+
window,
78+
binProperties,
79+
existing.binProperties,
80+
)
81+
datum.gcValues = collectValues(window, gcValues, existing.gcValues)
82+
datum.scValues = collectValues(window, scValues, existing.scValues)
83+
84+
logger.log("Done")
85+
}
86+
87+
logger.log("Generating code...")
88+
let code = `/* This file was generated with ECMAScript specifications. */
89+
90+
const gcNamePattern = /^(?:General_Category|gc)$/u
91+
const scNamePattern = /^(?:Script(?:_Extensions)?|scx?)$/u
92+
const gcValuePatterns = {
93+
${Array.from(
94+
Object.keys(data),
95+
version => `es${version}: null as RegExp | null,`,
96+
).join("\n")}
97+
}
98+
const scValuePatterns = {
99+
${Array.from(
100+
Object.keys(data),
101+
version => `es${version}: null as RegExp | null,`,
102+
).join("\n")}
103+
}
104+
const binPropertyPatterns = {
105+
${Array.from(
106+
Object.keys(data),
107+
version => `es${version}: null as RegExp | null,`,
108+
).join("\n")}
109+
}
110+
111+
export function isValidUnicodeProperty(version: number, name: string, value: string): boolean {
112+
if (gcNamePattern.test(name)) {
113+
${Array.from(Object.entries(data), ([version, { gcValues }]) =>
114+
makeVerificationCode(version, "gcValuePatterns", gcValues, 52),
115+
).join("\n")}
116+
}
117+
if (scNamePattern.test(name)) {
118+
${Array.from(Object.entries(data), ([version, { scValues }]) =>
119+
makeVerificationCode(version, "scValuePatterns", scValues, 52),
120+
).join("\n")}
121+
}
122+
return false
123+
}
124+
125+
export function isValidLoneUnicodeProperty(version: number, value: string): boolean {
126+
${Array.from(Object.entries(data), ([version, { binProperties }]) =>
127+
makeVerificationCode(version, "binPropertyPatterns", binProperties, 56),
128+
).join("\n")}
129+
return false
130+
}
131+
`
132+
133+
logger.log("Formatting code...")
134+
const engine = new CLIEngine({ fix: true })
135+
const result = engine.executeOnText(code, "properties.ts").results[0]
136+
code = result.output || code
137+
138+
logger.log("Writing '%s'...", FILE_PATH)
139+
await save(code)
140+
141+
logger.log("Completed!")
142+
})().catch(error => {
143+
logger.error(error.stack)
144+
process.exitCode = 1
145+
})
146+
147+
function collectValues(
148+
window: Window,
149+
id: string,
150+
existingSet: Set<string>,
151+
): string[] {
152+
return Array.from(
153+
window.document.querySelectorAll(`${id} td:nth-child(1) code`),
154+
node => node.textContent || "",
155+
)
156+
.filter(value => {
157+
if (existingSet.has(value)) {
158+
return false
159+
}
160+
existingSet.add(value)
161+
return true
162+
})
163+
.sort(undefined)
164+
}
165+
166+
function makeVerificationCode(
167+
version: string,
168+
patternVar: string,
169+
values: string[],
170+
maxLen: number,
171+
): string {
172+
if (values.length === 0) {
173+
return ""
174+
}
175+
176+
return `
177+
if (version >= ${version}) {
178+
if (!${patternVar}.es${version}) {
179+
${patternVar}.es${version} = new RegExp(
180+
${makeRegExpPatternCode(values, maxLen)},
181+
"u"
182+
)
183+
}
184+
if (${patternVar}.es${version}.test(value)) {
185+
return true
186+
}
187+
}
188+
`
189+
}
190+
191+
function makeRegExpPatternCode(names: string[], maxLen: number): string {
192+
const lines = ["^(?:"]
193+
for (const name of names) {
194+
const line = lines[lines.length - 1]
195+
const part = `${name}|`
196+
197+
if (line.length + part.length > maxLen) {
198+
lines.push(part)
199+
} else {
200+
lines[lines.length - 1] += part
201+
}
202+
}
203+
lines[lines.length - 1] = `${lines[lines.length - 1].replace(/\|$/u, "")})$`
204+
return lines.map(line => `"${line}"`).join("+")
205+
}
206+
207+
function save(content: string): Promise<void> {
208+
return new Promise((resolve, reject) => {
209+
fs.writeFile(FILE_PATH, content, error =>
210+
error ? reject(error) : resolve(),
211+
)
212+
})
213+
}

src/unicode/index.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
export { isIdContinue, isIdStart } from "./ids"
2-
export { PropertyData } from "./property-data"
2+
export {
3+
isValidLoneUnicodeProperty,
4+
isValidUnicodeProperty,
5+
} from "./properties"
36

47
export const Null = 0x00
58
export const Backspace = 0x08

0 commit comments

Comments
 (0)