Skip to content

Commit d6eada0

Browse files
committed
performance: 调整整个算法体系
1 parent 5814ea3 commit d6eada0

File tree

4 files changed

+64
-34
lines changed

4 files changed

+64
-34
lines changed

src/lib.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ fn main_test() {
4040
// file_name: Some("input.css".to_string()),
4141
..Default::default()
4242
}),
43-
chunk_size: Some(50 * 1024),
43+
// chunk_size: Some(70 * 1024),
4444
// 精确控制
4545
// subsets: vec![[65]].iter().map(|x| u32_array_to_u8_array(x)).collect(),
4646
// language_areas: Some(false),
4747
// auto_subset: Some(false),
4848
// subset_remain_chars: Some(false),
49-
font_feature: Some(false),
50-
// reduce_mins: Some(false),
49+
// font_feature: Some(false),
50+
// reduce_mins: Some(true),
5151
// rename_output_font: Some("font_[hash:6].[ext]".to_string()),
5252
..Default::default()
5353
};
@@ -72,6 +72,7 @@ fn main_test() {
7272
println!("Time: {:?}", duration);
7373
}
7474

75+
// test_on("./LXGWWenKai-Regular.ttf", "ttf");
7576
test_on("./packages/demo/public/SmileySans-Oblique.ttf", "ttf");
7677
// test_on("./packages/demo/public/SmileySans-Oblique.ttf.woff2", "woff2");
7778
}

src/pre_subset/mod.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,16 @@ pub fn pre_subset(ctx: &mut Context) {
7272
if ctx.input.language_areas.unwrap_or(true) {
7373
process.push(language_area_plugin);
7474
}
75-
if ctx.input.auto_subset.unwrap_or(true) {
76-
process.push(plugin_auto_subset);
77-
}
7875
if ctx.input.subset_remain_chars.unwrap_or(true) {
7976
process.push(add_remain_chars_plugin);
8077
}
78+
if ctx.input.auto_subset.unwrap_or(true) {
79+
process.push(plugin_auto_subset);
80+
}
8181
if ctx.input.font_feature.unwrap_or(true) {
8282
process.push(features_plugin);
8383
}
84+
// 尚未调试好这个算法,所以不启用
8485
if ctx.input.reduce_mins.unwrap_or(false) {
8586
process.push(reduce_min_plugin);
8687
}

src/pre_subset/plugin.rs

+2-18
Original file line numberDiff line numberDiff line change
@@ -45,32 +45,16 @@ pub fn add_remain_chars_plugin(
4545
info!("{} 个剩余字符被处理", remaining_chars_set.len());
4646
let mut v: Vec<u32> = remaining_chars_set.iter().cloned().collect();
4747
v.sort();
48-
49-
for i in split_into_chunks(v, 70) {
50-
subsets.push(i.iter().cloned().collect());
51-
}
52-
48+
subsets.push(v.iter().cloned().collect());
5349
remaining_chars_set.clear();
5450
}
55-
56-
fn split_into_chunks(v: Vec<u32>, chunk_size: usize) -> Vec<Vec<u32>> {
57-
let mut result = Vec::new();
58-
let mut current_chunk;
59-
for chunk in v.chunks(chunk_size) {
60-
current_chunk = Vec::with_capacity(chunk_size);
61-
current_chunk.extend_from_slice(chunk);
62-
result.push(current_chunk);
63-
}
64-
result
65-
}
6651
/// 把数量低于某个值的包,重新规划,缩减碎片分包数
6752
pub fn reduce_min_plugin(
6853
subsets: &mut Vec<IndexSet<u32>>,
6954
_remaining_chars_set: &mut HashSet<u32>,
7055
_ctx: &mut PreSubsetContext,
7156
) {
72-
// TODO 抽取为定义
73-
let min = 10;
57+
let min = 20;
7458
let max = 100;
7559
let mut cache: Vec<u32> = Vec::new();
7660
subsets.retain(|x| {

src/pre_subset/plugin_auto_subset.rs

+54-10
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ use crate::run_subset::build_single_subset;
88

99
use super::PreSubsetContext;
1010

11+
#[derive(Copy, Clone, Debug)]
12+
pub enum OptLevel {
13+
NO = 0,
14+
LOW = 1,
15+
MID = 2,
16+
HIGH = 3,
17+
}
18+
1119
pub fn plugin_auto_subset(
1220
subsets: &mut Vec<IndexSet<u32>>,
1321
_remaining_chars_set: &mut HashSet<u32>,
@@ -28,12 +36,38 @@ pub fn plugin_auto_subset(
2836
);
2937
let mut count: usize = 0;
3038
let mut new_used_languages = HashMap::new();
39+
// 后期用于区分算法,现在暂时无用
40+
let opt_level = match size {
41+
0..1000 => OptLevel::NO,
42+
1000..=10000 => OptLevel::LOW,
43+
10001..=30000 => OptLevel::MID,
44+
_ => OptLevel::HIGH,
45+
};
3146
let new_subsets = subsets
3247
.iter()
3348
.enumerate()
3449
.flat_map(|(index, subset)| {
35-
let res = split_vector(subset, chars_per_subset);
36-
if let Some(language) = ctx.used_languages.get(&index) {
50+
let lang = ctx.used_languages.get(&index);
51+
let res = match lang {
52+
// 繁体中文一般比简体中文要大一倍复杂度,故进行特殊处理
53+
Some(ref i) if *i == "ZH_TC" => {
54+
// 特殊处理ZH_CN的情况
55+
split_vector(
56+
subset,
57+
((chars_per_subset as f32) * 0.5_f32) as u32,
58+
opt_level,
59+
) // 假设对ZH_CN有不一样的处理逻辑
60+
}
61+
None => {
62+
return split_vector(
63+
subset,
64+
((chars_per_subset as f32) * 0.7_f32) as u32,
65+
opt_level,
66+
);
67+
}
68+
_ => split_vector(subset, chars_per_subset, opt_level),
69+
};
70+
if let Some(language) = lang {
3771
for _ in 0..res.len() {
3872
new_used_languages.insert(count, language.clone());
3973
count += 1;
@@ -53,13 +87,23 @@ pub fn plugin_auto_subset(
5387
}
5488

5589
// 计算当前包需要容纳多少个字符 y= max_count/ x^(1/3)
56-
fn length_for_index(x: usize, max_count: u32) -> usize {
57-
let y: f32 = (max_count as f32) / (x as f32).sqrt(); // 计算立方根并求解y
90+
fn length_for_index(x: usize, max_count: u32, level: OptLevel) -> usize {
91+
let min_count = (max_count / 5) as u32;
92+
let y: f32 = match level {
93+
OptLevel::NO => (min_count as f32) * (x as f32),
94+
OptLevel::LOW => (min_count as f32) * (x as f32).sqrt(),
95+
OptLevel::MID => (min_count as f32) * (x as f32).cbrt(),
96+
OptLevel::HIGH => (min_count as f32) * (x as f32).cbrt(),
97+
}; // 计算立方根并求解y
5898
let y_ceil = y.ceil(); // 将结果向上取整
5999
// 不能比 max_count 的 1/5 小
60-
std::cmp::max(y_ceil as usize, (max_count / 5) as usize)
100+
std::cmp::min(y_ceil as usize, (max_count) as usize)
61101
}
62-
fn split_vector(vec: &IndexSet<u32>, max_count: u32) -> Vec<IndexSet<u32>> {
102+
fn split_vector(
103+
vec: &IndexSet<u32>,
104+
max_count: u32,
105+
level: OptLevel,
106+
) -> Vec<IndexSet<u32>> {
63107
let mut result: Vec<IndexSet<u32>> = Vec::new();
64108
let mut current_start = 0;
65109
let size = vec.len();
@@ -69,7 +113,7 @@ fn split_vector(vec: &IndexSet<u32>, max_count: u32) -> Vec<IndexSet<u32>> {
69113
debug!("fold {} -> {} | max {}", size, i - 1, max_count);
70114
break;
71115
}
72-
let len = length_for_index(i, max_count);
116+
let len = length_for_index(i, max_count, level.clone());
73117
// println!("{}", len);
74118
let to_take = std::cmp::min(len, size - current_start);
75119
let new_sub_vec = IndexSet::from_iter(
@@ -88,7 +132,7 @@ mod tests {
88132
#[test]
89133
fn split_vector_empty_input_empty_result() {
90134
let input = IndexSet::new();
91-
let result = split_vector(&input, 150);
135+
let result = split_vector(&input, 150, OptLevel::LOW);
92136
assert!(result.is_empty());
93137
}
94138

@@ -98,7 +142,7 @@ mod tests {
98142
for x in 1..10 {
99143
input.insert(x);
100144
}
101-
let result = split_vector(&input, 150);
145+
let result = split_vector(&input, 150, OptLevel::LOW);
102146
assert_eq!(result.len(), 1);
103147
assert_eq!(result[0], input);
104148
// println!("result: {:?}", result)
@@ -110,7 +154,7 @@ mod tests {
110154
for x in 1..400 {
111155
input.insert(x);
112156
}
113-
let result = split_vector(&input, 150);
157+
let result = split_vector(&input, 150, OptLevel::LOW);
114158
println!("result: {:#?}", result);
115159
assert_eq!(result.len(), 4);
116160
}

0 commit comments

Comments
 (0)