Skip to content

Commit 203bf15

Browse files
committed
feat: 添加 google css 转中文常用顺序
1 parent 78a87b3 commit 203bf15

File tree

6 files changed

+2395
-32
lines changed

6 files changed

+2395
-32
lines changed

crates/lang_unicodes/Cargo.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "lang-unicodes"
3-
version = "0.1.3"
3+
version = "0.2.0"
44
edition = "2021"
55
description = "Effortlessly convert and validate text across various Unicode ranges, supporting multiple languages for seamless internationalization in your applications."
66
authors = ["KonghaYao<[email protected]>"]
@@ -11,11 +11,13 @@ include = ["data", "src"]
1111

1212
[dependencies]
1313
lazy_static = "1.5.0"
14+
cn-font-utils = { version = "0.1.2", path = "../cn_font_utils" }
1415

1516
[lib]
1617
path = "src/lib.rs"
1718

1819
[build-dependencies]
20+
cn-font-utils = { version = "0.1.2", path = "../cn_font_utils" }
1921
lazy_static = { version = "1.5.0", optional = true }
2022
opencc-rs = { version = "0.4.7", optional = true }
2123
indexmap = "2.7.1"

crates/lang_unicodes/build.rs

+9-6
Large diffs are not rendered by default.
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import fs from 'fs-extra';
2+
main();
3+
function main() {
4+
const css = fs.readFileSync('./scripts/noto-sans-sc.css', 'utf-8');
5+
const data = getSubsetsFromCSS(css);
6+
7+
let flatten_data = data.map((i) => {
8+
return i.flatMap((subset) => {
9+
if (subset instanceof Array) {
10+
// 从 subset[0] 到 subset[1]
11+
let res: number[] = [];
12+
for (let index = subset[0]; index <= subset[1]; index++) {
13+
res.push(index);
14+
}
15+
return res;
16+
} else {
17+
return [subset];
18+
}
19+
});
20+
});
21+
22+
const final_data = flatten_data
23+
.map((i) => i.filter((ii) => ii >= 0x4e00 && ii <= 0x9fff))
24+
.reverse()
25+
.flat()
26+
.slice(0, 7000);
27+
28+
if (final_data.some((i) => i > 65535)) {
29+
console.error('注入危险');
30+
}
31+
32+
fs.writeFileSync('./data/sc.bin', new Uint16Array(final_data));
33+
console.log(final_data.map((i) => String.fromCodePoint(i)).join(' '));
34+
}
35+
36+
/** 从 CSS 文件中获取字体 subsets 类型的数据 */
37+
export function getSubsetsFromCSS(css: string) {
38+
const list = css.match(/@font-face[\s\S]+?\}/g);
39+
if (!list) return [];
40+
return list.map((face) => {
41+
const unicodeList = face.match(/unicode-range:([\s\S]*(?:[,;]))+/);
42+
if (!unicodeList) return [];
43+
const range = unicodeList[1];
44+
return range
45+
.split(/[,;]/)
46+
.map((i) => i.trim())
47+
.filter(Boolean)
48+
.map((i) => {
49+
i = i.replace('U+', '');
50+
if (i.includes('-')) {
51+
return i.split('-').map((i) => parseInt('0x' + i)) as [
52+
number,
53+
number,
54+
];
55+
} else {
56+
return parseInt('0x' + i);
57+
}
58+
});
59+
});
60+
}

0 commit comments

Comments
 (0)