|
| 1 | +import fs from "fs" |
| 2 | +import { JSDOM, DOMWindow } from "jsdom" |
| 3 | +import { CLIEngine } from "eslint" |
| 4 | + |
| 5 | +const DataSources = [ |
| 6 | + { |
| 7 | + url: "https://www.ecma-international.org/ecma-262/9.0/", |
| 8 | + version: 2018, |
| 9 | + binProperties: "#table-binary-unicode-properties", |
| 10 | + gcValues: "#table-unicode-general-category-values", |
| 11 | + scValues: "#table-unicode-script-values", |
| 12 | + }, |
| 13 | + { |
| 14 | + url: "https://www.ecma-international.org/ecma-262/10.0/", |
| 15 | + version: 2019, |
| 16 | + binProperties: "#table-binary-unicode-properties", |
| 17 | + gcValues: "#table-unicode-general-category-values", |
| 18 | + scValues: "#table-unicode-script-values", |
| 19 | + }, |
| 20 | + { |
| 21 | + url: "https://tc39.es/ecma262/", |
| 22 | + version: 2020, |
| 23 | + binProperties: "#table-binary-unicode-properties", |
| 24 | + gcValues: "#table-unicode-general-category-values", |
| 25 | + scValues: "#table-unicode-script-values", |
| 26 | + }, |
| 27 | +] |
| 28 | +const FILE_PATH = "src/unicode/properties.ts" |
| 29 | +const logger = console |
| 30 | + |
| 31 | +type Datum = { |
| 32 | + binProperties: string[] |
| 33 | + gcValues: string[] |
| 34 | + scValues: string[] |
| 35 | +} |
| 36 | + |
| 37 | +// Main |
| 38 | +;(async () => { |
| 39 | + const data: Record<number, Datum> = Object.create(null) |
| 40 | + const existing = { |
| 41 | + binProperties: new Set<string>(), |
| 42 | + gcValues: new Set<string>(), |
| 43 | + scValues: new Set<string>(), |
| 44 | + } |
| 45 | + |
| 46 | + for (const { |
| 47 | + binProperties, |
| 48 | + gcValues, |
| 49 | + scValues, |
| 50 | + url, |
| 51 | + version, |
| 52 | + } of DataSources) { |
| 53 | + logger.log("---- ECMAScript %d ----", version) |
| 54 | + const datum: Datum = { |
| 55 | + binProperties: [], |
| 56 | + gcValues: [], |
| 57 | + scValues: [], |
| 58 | + } |
| 59 | + data[version] = datum |
| 60 | + |
| 61 | + let window: DOMWindow | null = null |
| 62 | + do { |
| 63 | + try { |
| 64 | + logger.log("Fetching data from %o", url) |
| 65 | + ;({ window } = await JSDOM.fromURL(url)) |
| 66 | + } catch (error) { |
| 67 | + if (!error || error.message !== "Error: socket hang up") { |
| 68 | + throw error |
| 69 | + } |
| 70 | + logger.log("Failed: %s", error) |
| 71 | + await new Promise(resolve => setTimeout(resolve, 2000)) |
| 72 | + } |
| 73 | + } while (window == null) |
| 74 | + |
| 75 | + logger.log("Parsing tables") |
| 76 | + datum.binProperties = collectValues( |
| 77 | + window, |
| 78 | + binProperties, |
| 79 | + existing.binProperties, |
| 80 | + ) |
| 81 | + datum.gcValues = collectValues(window, gcValues, existing.gcValues) |
| 82 | + datum.scValues = collectValues(window, scValues, existing.scValues) |
| 83 | + |
| 84 | + logger.log("Done") |
| 85 | + } |
| 86 | + |
| 87 | + logger.log("Generating code...") |
| 88 | + let code = `/* This file was generated with ECMAScript specifications. */ |
| 89 | +
|
| 90 | +const gcNamePattern = /^(?:General_Category|gc)$/u |
| 91 | +const scNamePattern = /^(?:Script(?:_Extensions)?|scx?)$/u |
| 92 | +const gcValuePatterns = { |
| 93 | + ${Array.from( |
| 94 | + Object.keys(data), |
| 95 | + version => `es${version}: null as RegExp | null,`, |
| 96 | + ).join("\n")} |
| 97 | +} |
| 98 | +const scValuePatterns = { |
| 99 | + ${Array.from( |
| 100 | + Object.keys(data), |
| 101 | + version => `es${version}: null as RegExp | null,`, |
| 102 | + ).join("\n")} |
| 103 | +} |
| 104 | +const binPropertyPatterns = { |
| 105 | + ${Array.from( |
| 106 | + Object.keys(data), |
| 107 | + version => `es${version}: null as RegExp | null,`, |
| 108 | + ).join("\n")} |
| 109 | +} |
| 110 | +
|
| 111 | +export function isValidUnicodeProperty(version: number, name: string, value: string): boolean { |
| 112 | + if (gcNamePattern.test(name)) { |
| 113 | + ${Array.from(Object.entries(data), ([version, { gcValues }]) => |
| 114 | + makeVerificationCode(version, "gcValuePatterns", gcValues, 52), |
| 115 | + ).join("\n")} |
| 116 | + } |
| 117 | + if (scNamePattern.test(name)) { |
| 118 | + ${Array.from(Object.entries(data), ([version, { scValues }]) => |
| 119 | + makeVerificationCode(version, "scValuePatterns", scValues, 52), |
| 120 | + ).join("\n")} |
| 121 | + } |
| 122 | + return false |
| 123 | +} |
| 124 | +
|
| 125 | +export function isValidLoneUnicodeProperty(version: number, value: string): boolean { |
| 126 | + ${Array.from(Object.entries(data), ([version, { binProperties }]) => |
| 127 | + makeVerificationCode(version, "binPropertyPatterns", binProperties, 56), |
| 128 | + ).join("\n")} |
| 129 | + return false |
| 130 | +} |
| 131 | +` |
| 132 | + |
| 133 | + logger.log("Formatting code...") |
| 134 | + const engine = new CLIEngine({ fix: true }) |
| 135 | + const result = engine.executeOnText(code, "properties.ts").results[0] |
| 136 | + code = result.output || code |
| 137 | + |
| 138 | + logger.log("Writing '%s'...", FILE_PATH) |
| 139 | + await save(code) |
| 140 | + |
| 141 | + logger.log("Completed!") |
| 142 | +})().catch(error => { |
| 143 | + logger.error(error.stack) |
| 144 | + process.exitCode = 1 |
| 145 | +}) |
| 146 | + |
| 147 | +function collectValues( |
| 148 | + window: Window, |
| 149 | + id: string, |
| 150 | + existingSet: Set<string>, |
| 151 | +): string[] { |
| 152 | + return Array.from( |
| 153 | + window.document.querySelectorAll(`${id} td:nth-child(1) code`), |
| 154 | + node => node.textContent || "", |
| 155 | + ) |
| 156 | + .filter(value => { |
| 157 | + if (existingSet.has(value)) { |
| 158 | + return false |
| 159 | + } |
| 160 | + existingSet.add(value) |
| 161 | + return true |
| 162 | + }) |
| 163 | + .sort(undefined) |
| 164 | +} |
| 165 | + |
| 166 | +function makeVerificationCode( |
| 167 | + version: string, |
| 168 | + patternVar: string, |
| 169 | + values: string[], |
| 170 | + maxLen: number, |
| 171 | +): string { |
| 172 | + if (values.length === 0) { |
| 173 | + return "" |
| 174 | + } |
| 175 | + |
| 176 | + return ` |
| 177 | + if (version >= ${version}) { |
| 178 | + if (!${patternVar}.es${version}) { |
| 179 | + ${patternVar}.es${version} = new RegExp( |
| 180 | + ${makeRegExpPatternCode(values, maxLen)}, |
| 181 | + "u" |
| 182 | + ) |
| 183 | + } |
| 184 | + if (${patternVar}.es${version}.test(value)) { |
| 185 | + return true |
| 186 | + } |
| 187 | + } |
| 188 | + ` |
| 189 | +} |
| 190 | + |
| 191 | +function makeRegExpPatternCode(names: string[], maxLen: number): string { |
| 192 | + const lines = ["^(?:"] |
| 193 | + for (const name of names) { |
| 194 | + const line = lines[lines.length - 1] |
| 195 | + const part = `${name}|` |
| 196 | + |
| 197 | + if (line.length + part.length > maxLen) { |
| 198 | + lines.push(part) |
| 199 | + } else { |
| 200 | + lines[lines.length - 1] += part |
| 201 | + } |
| 202 | + } |
| 203 | + lines[lines.length - 1] = `${lines[lines.length - 1].replace(/\|$/u, "")})$` |
| 204 | + return lines.map(line => `"${line}"`).join("+") |
| 205 | +} |
| 206 | + |
| 207 | +function save(content: string): Promise<void> { |
| 208 | + return new Promise((resolve, reject) => { |
| 209 | + fs.writeFile(FILE_PATH, content, error => |
| 210 | + error ? reject(error) : resolve(), |
| 211 | + ) |
| 212 | + }) |
| 213 | +} |
0 commit comments