Skip to content

Commit e69477a

Browse files
committed
feat: 优化加载情况
1 parent bd66e8e commit e69477a

File tree

6 files changed

+163
-13
lines changed

6 files changed

+163
-13
lines changed

crates/lang_unicodes/build.rs

+2-2
Large diffs are not rendered by default.

crates/lang_unicodes/src/lib.rs

+35-7
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,47 @@ pub fn expand_ranges(ranges: &[(u32, u32)]) -> Vec<u32> {
1212
lazy_static! {
1313
/**
1414
* Latin 范围替换
15-
* @link https://npmmirror.com/package/@fontsource/noto-sans/files/400.css?version=5.0.22#L61
1615
* 0 不归入此,一般 0 是用于占位的
1716
*/
18-
pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x00FF)]);
17+
pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x007F)]);
18+
pub static ref LATIN_1: Vec<u32> = expand_ranges(&[(0x0080, 0x00FF)]);
1919
pub static ref LATIN_EXT_A: Vec<u32> = expand_ranges(&[(0x0100, 0x017F)]);
2020
pub static ref LATIN_EXT_B: Vec<u32> = expand_ranges(&[(0x0180, 0x024F)]);
2121

2222

23-
pub static ref HALFWIDTH_FULLWIDTH: Vec<u32> = expand_ranges(&[(0xFF00, 0xFFEF)]);
2423

2524
pub static ref IPA_SYMBOLS: Vec<u32> = expand_ranges(&[(0x0250, 0x02FF)]);
2625

27-
pub static ref ZH_SYMBOL: Vec<u32> = expand_ranges(&[(0x0300,0x036f),(0xFE10, 0xFE4F)]);
26+
// 定义 ZH_SYMBOL 静态引用,包含特化处理的中文常用符号的 Unicode 码点
27+
pub static ref ZH_SYMBOL: Vec<u32> = expand_ranges(&[
28+
// …
29+
(0x2026, 0x2026),
30+
// 句号(、。)
31+
(0x3001, 0x3002),
32+
//《》
33+
(0x300a, 0x300b),
34+
// 逗号(,-)
35+
(0xFF0C, 0xFF0D),
36+
// 问号(?)
37+
(0xFF1F, 0xFF1F),
38+
// |
39+
(0xFF5C, 0xFF5C),
40+
// 感叹号(!)
41+
(0xFF01, 0xFF01),
42+
// 分号(;)
43+
(0xFF1B, 0xFF1B),
44+
// 括号(())
45+
(0xFF08, 0xFF09),
46+
// 冒号(:)
47+
(0xFF1A, 0xFF1A),
48+
// 引号(“” ‘’)
49+
(0x201C, 0x201D), // “”
50+
(0x2018, 0x2019), // ‘’
51+
// 破折号(——)
52+
(0x2014, 0x2014),
53+
]);
2854

55+
pub static ref HALFWIDTH_FULLWIDTH: Vec<u32> = expand_ranges(&[(0xFF00, 0xFFEF)]);
2956

3057
pub static ref GREEK: Vec<u32> = expand_ranges(&[(0x0370, 0x03FF), (0x1F00, 0x1FFF)]);
3158

@@ -130,17 +157,17 @@ mod tests {
130157
use super::*;
131158
#[test]
132159
fn test() {
133-
assert_eq!(LATIN.len(), 255)
160+
assert_eq!(LATIN.len(), 127)
134161
}
135162
}
136163

137-
pub fn create_default_unicode_area() -> [Vec<u32>; 31] {
164+
pub fn create_default_unicode_area() -> [Vec<u32>; 32] {
138165
[
139166
LATIN.to_vec(),
167+
LATIN_1.to_vec(),
140168
LATIN_EXT_A.to_vec(),
141169
LATIN_EXT_B.to_vec(),
142170
IPA_SYMBOLS.to_vec(),
143-
HALFWIDTH_FULLWIDTH.to_vec(),
144171
GREEK.to_vec(),
145172
CYRILLIC.to_vec(),
146173
// 中文处理
@@ -170,6 +197,7 @@ pub fn create_default_unicode_area() -> [Vec<u32>; 31] {
170197
VA.to_vec(),
171198
ZHUANG.to_vec(),
172199
NAXI_DONGBA.to_vec(),
200+
HALFWIDTH_FULLWIDTH.to_vec(),
173201
]
174202
}
175203
pub fn create_default_unicode_area_tag() -> [&'static str; 31] {

src/lib.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ fn main_test() {
4646
// language_areas: Some(false),
4747
// auto_subset: Some(false),
4848
// subset_remain_chars: Some(false),
49-
// font_feature: Some(false),
50-
// reduce_mins: Some(true),
49+
font_feature: Some(false),
50+
reduce_mins: Some(false),
5151
// rename_output_font: Some("font_[hash:6].[ext]".to_string()),
5252
..Default::default()
5353
};
@@ -72,7 +72,7 @@ fn main_test() {
7272
println!("Time: {:?}", duration);
7373
}
7474

75-
test_on("./LXGWWenKai-Regular.ttf", "ttf");
76-
// test_on("./packages/demo/public/SmileySans-Oblique.ttf", "ttf");
75+
// test_on("./LXGWWenKai-Regular.ttf", "ttf");
76+
test_on("./packages/demo/public/SmileySans-Oblique.ttf", "ttf");
7777
// test_on("./packages/demo/public/SmileySans-Oblique.ttf.woff2", "woff2");
7878
}

test/index.ts

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import { readFileSync } from 'fs';
2+
3+
export function renderToHumanReadableText(
4+
subset: ReturnType<typeof findWhichSubset>,
5+
): string {
6+
// console.log(subset);
7+
const ready = Object.entries(subset).filter(([, v]) => v.length > 0);
8+
const main = ready
9+
.map(([src, codes], index) => {
10+
const unicodes = new Set(codes.map((i) => i.code));
11+
const usageRate =
12+
(unicodes.size * 100) / (codes[0].p?.range.length || NaN);
13+
return `
14+
${index + 1} ${usageRate.toFixed(0)}% ${src}:
15+
---
16+
${String.fromCodePoint(...unicodes)}
17+
---`;
18+
})
19+
.join('\n');
20+
return `总数: ${ready.length} \n` + main;
21+
}
22+
23+
export function main(str: string, cssPath: string) {
24+
return renderToHumanReadableText(
25+
findWhichSubset(
26+
readFileSync(str, 'utf-8'),
27+
readFileSync(cssPath, 'utf-8'),
28+
),
29+
);
30+
}
31+
32+
// 读取 sample 文件,并分析使用字符情况
33+
export function run() {
34+
const str = process.argv[2];
35+
const cssPath = process.argv[3];
36+
console.log(str, cssPath);
37+
console.log(main(str, cssPath));
38+
}
39+
run();
40+
41+
export function findWhichSubset(str: string, css: string) {
42+
const pkg = getSubsetsFromCSS(css);
43+
const AToB = [...str]
44+
.map((i) => i.codePointAt(0)!)
45+
.map((code) => {
46+
const p = pkg.find((p) => {
47+
const isInThisP = p?.range.includes(code);
48+
return isInThisP;
49+
});
50+
return {
51+
code,
52+
p,
53+
};
54+
});
55+
56+
// p 为维度,聚合 code 为 array
57+
const result = AToB.reduce(
58+
(acc, t) => {
59+
const { p } = t;
60+
if (t.p) {
61+
/** @ts-ignore */
62+
if (!acc[p.src]) {
63+
/** @ts-ignore */
64+
acc[p.src] = [];
65+
}
66+
/** @ts-ignore */
67+
acc[p.src].push(t);
68+
} else {
69+
acc.unknown.push(t);
70+
}
71+
return acc;
72+
},
73+
{
74+
unknown: [],
75+
} as Record<string, typeof AToB>,
76+
);
77+
return result;
78+
}
79+
80+
/** 从 CSS 文件中获取字体 subsets 类型的数据 */
81+
export function getSubsetsFromCSS(css: string) {
82+
const list = css.match(/@font-face[\s\S]+?\}/g);
83+
if (!list) return [];
84+
return list
85+
.map((face) => {
86+
const unicodeList = face.match(/unicode-range:([\s\S]*(?:[,;]))+/);
87+
const src = face.match(/src:[^;]*(?:[,;])+/);
88+
if (!unicodeList) return null;
89+
const range = unicodeList[1];
90+
return {
91+
src,
92+
range: range
93+
.split(/[,;]/)
94+
.map((i) => i.trim())
95+
.filter(Boolean)
96+
.map((i) => {
97+
i = i.replace('U+', '');
98+
if (i.includes('-')) {
99+
return i.split('-').map((i) => parseInt('0x' + i));
100+
} else {
101+
return parseInt('0x' + i);
102+
}
103+
})
104+
.flatMap((arr) => {
105+
if (typeof arr === 'number') {
106+
return [arr];
107+
} else {
108+
const [start, end] = arr;
109+
110+
// 包含 end 的区间
111+
return Array.from(
112+
{ length: end - start + 1 },
113+
(_, i) => start + i,
114+
);
115+
}
116+
}),
117+
};
118+
})
119+
.filter(Boolean);
120+
}

test/sample.txt

Whitespace-only changes.

test/test-subset.sh

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
bun test/index.ts ./test/sample.txt ./dist/ttf/result.css >./test/temp/result.txt
2+
bun test/index.ts ./test/sample.txt ./test/temp/font.css >./test/temp/result1.txt

0 commit comments

Comments
 (0)