feat: 优化加载情况

KonghaYao · KonghaYao · commit e69477a720cf · 2025-03-09T17:04:21.000+08:00
diff --git a/crates/lang_unicodes/build.rs b/crates/lang_unicodes/build.rs
diff --git a/crates/lang_unicodes/src/lib.rs b/crates/lang_unicodes/src/lib.rs
@@ -12,20 +12,47 @@ pub fn expand_ranges(ranges: &[(u32, u32)]) -> Vec<u32> {
 lazy_static! {
     /**
      * Latin 范围替换
-     * @link https://npmmirror.com/package/@fontsource/noto-sans/files/400.css?version=5.0.22#L61
      * 0 不归入此，一般 0 是用于占位的
      */
-    pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x00FF)]);
+    pub static ref LATIN: Vec<u32> = expand_ranges(&[(0x0001, 0x007F)]);
+    pub static ref LATIN_1: Vec<u32> = expand_ranges(&[(0x0080, 0x00FF)]);
     pub static ref LATIN_EXT_A: Vec<u32> = expand_ranges(&[(0x0100, 0x017F)]);
     pub static ref LATIN_EXT_B: Vec<u32> = expand_ranges(&[(0x0180, 0x024F)]);
 
 
-    pub static ref HALFWIDTH_FULLWIDTH: Vec<u32> = expand_ranges(&[(0xFF00, 0xFFEF)]);
 
     pub static ref IPA_SYMBOLS: Vec<u32> = expand_ranges(&[(0x0250, 0x02FF)]);
 
-    pub static ref ZH_SYMBOL: Vec<u32> = expand_ranges(&[(0x0300,0x036f),(0xFE10, 0xFE4F)]);
+    // 定义 ZH_SYMBOL 静态引用，包含特化处理的中文常用符号的 Unicode 码点
+    pub static ref ZH_SYMBOL: Vec<u32> = expand_ranges(&[
+        // …
+        (0x2026, 0x2026),
+        // 句号（、。）
+        (0x3001, 0x3002),
+        //《》
+        (0x300a, 0x300b),
+        // 逗号（，－）
+        (0xFF0C, 0xFF0D),
+        // 问号（？）
+        (0xFF1F, 0xFF1F),
+        // ｜
+        (0xFF5C, 0xFF5C),
+        // 感叹号（！）
+        (0xFF01, 0xFF01),
+        // 分号（；）
+        (0xFF1B, 0xFF1B),
+        // 括号（（））
+        (0xFF08, 0xFF09),
+        // 冒号（：）
+        (0xFF1A, 0xFF1A),
+        // 引号（“” ‘’）
+        (0x201C, 0x201D), // “”
+        (0x2018, 0x2019), // ‘’
+        // 破折号（——）
+        (0x2014, 0x2014),
+    ]);
 
+    pub static ref HALFWIDTH_FULLWIDTH: Vec<u32> = expand_ranges(&[(0xFF00, 0xFFEF)]);
 
     pub static ref GREEK: Vec<u32> = expand_ranges(&[(0x0370, 0x03FF), (0x1F00, 0x1FFF)]);
 
@@ -130,17 +157,17 @@ mod tests {
     use super::*;
     #[test]
     fn test() {
-        assert_eq!(LATIN.len(), 255)
+        assert_eq!(LATIN.len(), 127)
     }
 }
 
-pub fn create_default_unicode_area() -> [Vec<u32>; 31] {
+pub fn create_default_unicode_area() -> [Vec<u32>; 32] {
     [
         LATIN.to_vec(),
+        LATIN_1.to_vec(),
         LATIN_EXT_A.to_vec(),
         LATIN_EXT_B.to_vec(),
         IPA_SYMBOLS.to_vec(),
-        HALFWIDTH_FULLWIDTH.to_vec(),
         GREEK.to_vec(),
         CYRILLIC.to_vec(),
         // 中文处理
@@ -170,6 +197,7 @@ pub fn create_default_unicode_area() -> [Vec<u32>; 31] {
         VA.to_vec(),
         ZHUANG.to_vec(),
         NAXI_DONGBA.to_vec(),
+        HALFWIDTH_FULLWIDTH.to_vec(),
     ]
 }
 pub fn create_default_unicode_area_tag() -> [&'static str; 31] {
diff --git a/src/lib.rs b/src/lib.rs
@@ -46,8 +46,8 @@ fn main_test() {
             // language_areas: Some(false),
             // auto_subset: Some(false),
             // subset_remain_chars: Some(false),
-            // font_feature: Some(false),
-            // reduce_mins: Some(true),
+            font_feature: Some(false),
+            reduce_mins: Some(false),
             // rename_output_font: Some("font_[hash:6].[ext]".to_string()),
             ..Default::default()
         };
@@ -72,7 +72,7 @@ fn main_test() {
         println!("Time: {:?}", duration);
     }
 
-    test_on("./LXGWWenKai-Regular.ttf", "ttf");
-    // test_on("./packages/demo/public/SmileySans-Oblique.ttf", "ttf");
+    // test_on("./LXGWWenKai-Regular.ttf", "ttf");
+    test_on("./packages/demo/public/SmileySans-Oblique.ttf", "ttf");
     // test_on("./packages/demo/public/SmileySans-Oblique.ttf.woff2", "woff2");
 }
diff --git a/test/index.ts b/test/index.ts
@@ -0,0 +1,120 @@
+import { readFileSync } from 'fs';
+
+export function renderToHumanReadableText(
+    subset: ReturnType<typeof findWhichSubset>,
+): string {
+    // console.log(subset);
+    const ready = Object.entries(subset).filter(([, v]) => v.length > 0);
+    const main = ready
+        .map(([src, codes], index) => {
+            const unicodes = new Set(codes.map((i) => i.code));
+            const usageRate =
+                (unicodes.size * 100) / (codes[0].p?.range.length || NaN);
+            return `
+${index + 1} ${usageRate.toFixed(0)}% ${src}: 
+---
+${String.fromCodePoint(...unicodes)}
+---`;
+        })
+        .join('\n');
+    return `总数: ${ready.length} \n` + main;
+}
+
+export function main(str: string, cssPath: string) {
+    return renderToHumanReadableText(
+        findWhichSubset(
+            readFileSync(str, 'utf-8'),
+            readFileSync(cssPath, 'utf-8'),
+        ),
+    );
+}
+
+// 读取 sample 文件，并分析使用字符情况
+export function run() {
+    const str = process.argv[2];
+    const cssPath = process.argv[3];
+    console.log(str, cssPath);
+    console.log(main(str, cssPath));
+}
+run();
+
+export function findWhichSubset(str: string, css: string) {
+    const pkg = getSubsetsFromCSS(css);
+    const AToB = [...str]
+        .map((i) => i.codePointAt(0)!)
+        .map((code) => {
+            const p = pkg.find((p) => {
+                const isInThisP = p?.range.includes(code);
+                return isInThisP;
+            });
+            return {
+                code,
+                p,
+            };
+        });
+
+    // p 为维度，聚合 code 为 array
+    const result = AToB.reduce(
+        (acc, t) => {
+            const { p } = t;
+            if (t.p) {
+                /** @ts-ignore */
+                if (!acc[p.src]) {
+                    /** @ts-ignore */
+                    acc[p.src] = [];
+                }
+                /** @ts-ignore */
+                acc[p.src].push(t);
+            } else {
+                acc.unknown.push(t);
+            }
+            return acc;
+        },
+        {
+            unknown: [],
+        } as Record<string, typeof AToB>,
+    );
+    return result;
+}
+
+/** 从 CSS 文件中获取字体 subsets 类型的数据 */
+export function getSubsetsFromCSS(css: string) {
+    const list = css.match(/@font-face[\s\S]+?\}/g);
+    if (!list) return [];
+    return list
+        .map((face) => {
+            const unicodeList = face.match(/unicode-range:([\s\S]*(?:[,;]))+/);
+            const src = face.match(/src:[^;]*(?:[,;])+/);
+            if (!unicodeList) return null;
+            const range = unicodeList[1];
+            return {
+                src,
+                range: range
+                    .split(/[,;]/)
+                    .map((i) => i.trim())
+                    .filter(Boolean)
+                    .map((i) => {
+                        i = i.replace('U+', '');
+                        if (i.includes('-')) {
+                            return i.split('-').map((i) => parseInt('0x' + i));
+                        } else {
+                            return parseInt('0x' + i);
+                        }
+                    })
+                    .flatMap((arr) => {
+                        if (typeof arr === 'number') {
+                            return [arr];
+                        } else {
+                            const [start, end] = arr;
+
+                            // 包含 end 的区间
+                            return Array.from(
+                                { length: end - start + 1 },
+                                (_, i) => start + i,
+                            );
+                        }
+                    }),
+            };
+        })
+        .filter(Boolean);
+}
diff --git a/test/sample.txt b/test/sample.txt
diff --git a/test/test-subset.sh b/test/test-subset.sh
@@ -0,0 +1,2 @@
+bun test/index.ts ./test/sample.txt ./dist/ttf/result.css >./test/temp/result.txt
+bun test/index.ts ./test/sample.txt ./test/temp/font.css >./test/temp/result1.txt

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+bun test/index.ts ./test/sample.txt ./dist/ttf/result.css >./test/temp/result.txt`
	`2`	`+bun test/index.ts ./test/sample.txt ./test/temp/font.css >./test/temp/result1.txt`