1
1
use indexmap:: IndexSet ;
2
2
3
- use log:: info;
3
+ use log:: { debug , info} ;
4
4
5
5
use crate :: run_subset:: build_single_subset;
6
6
@@ -36,31 +36,74 @@ pub fn chunk_iterable_and_flat(
36
36
subsets : & mut Vec < IndexSet < u32 > > ,
37
37
max_chunk_size : u32 ,
38
38
) -> Vec < IndexSet < u32 > > {
39
- let max_chunk_size = max_chunk_size - 1 ;
40
39
subsets
41
40
. iter ( )
42
- . flat_map ( |subset| {
43
- let mut count = 0 ;
44
- let mut result: Vec < IndexSet < u32 > > = vec ! [ ] ;
45
- let mut new_subset: IndexSet < u32 > = IndexSet :: new ( ) ;
46
- subset. iter ( ) . for_each ( |c| {
47
- new_subset. insert ( c. clone ( ) ) ;
48
- if count >= max_chunk_size {
49
- count = 0 ;
50
- result. push ( new_subset. clone ( ) ) ;
51
- new_subset = IndexSet :: new ( ) ;
52
- } else {
53
- count += 1 ;
54
- }
55
- } ) ;
56
- if new_subset. len ( ) > 0 {
57
- result. push ( new_subset)
58
- } ;
59
- result
60
- } )
41
+ . flat_map ( |subset| split_vector ( subset, max_chunk_size) )
61
42
. collect :: < Vec < IndexSet < u32 > > > ( )
62
43
}
63
44
45
+ // 计算当前包需要容纳多少个字符 y= max_count/ x^(1/3)
46
+ fn length_for_index ( x : usize , max_count : u32 ) -> usize {
47
+ let y: f32 = ( max_count as f32 ) / ( x as f32 ) . cbrt ( ) ; // 计算立方根并求解y
48
+ let y_ceil = y. ceil ( ) ; // 将结果向上取整
49
+ y_ceil as usize
50
+ }
51
+ fn split_vector ( vec : & IndexSet < u32 > , max_count : u32 ) -> Vec < IndexSet < u32 > > {
52
+ let mut result: Vec < IndexSet < u32 > > = Vec :: new ( ) ;
53
+ let mut current_start = 0 ;
54
+ let size = vec. len ( ) ;
55
+
56
+ for i in 1 .. {
57
+ if current_start >= size {
58
+ debug ! ( "fold {} -> {} | max {}" , size, i - 1 , max_count) ;
59
+ break ;
60
+ }
61
+ let len = length_for_index ( i, max_count) ;
62
+ // println!("{}", len);
63
+ let to_take = std:: cmp:: min ( len, size - current_start) ;
64
+ let new_sub_vec = IndexSet :: from_iter (
65
+ vec[ current_start..current_start + to_take] . iter ( ) . cloned ( ) ,
66
+ ) ;
67
+ result. push ( new_sub_vec) ;
68
+ current_start += to_take;
69
+ }
70
+
71
+ result
72
+ }
73
+ #[ cfg( test) ]
74
+ mod tests {
75
+ use super :: * ;
76
+
77
+ #[ test]
78
+ fn split_vector_empty_input_empty_result ( ) {
79
+ let input = IndexSet :: new ( ) ;
80
+ let result = split_vector ( & input, 150 ) ;
81
+ assert ! ( result. is_empty( ) ) ;
82
+ }
83
+
84
+ #[ test]
85
+ fn split_vector_single_element_single_element_result ( ) {
86
+ let mut input = IndexSet :: new ( ) ;
87
+ for x in 1 ..10 {
88
+ input. insert ( x) ;
89
+ }
90
+ let result = split_vector ( & input, 150 ) ;
91
+ assert_eq ! ( result. len( ) , 1 ) ;
92
+ assert_eq ! ( result[ 0 ] , input) ;
93
+ // println!("result: {:?}", result)
94
+ }
95
+
96
+ #[ test]
97
+ fn split_vector_multiple_elements_multiple_subsets ( ) {
98
+ let mut input = IndexSet :: new ( ) ;
99
+ for x in 1 ..400 {
100
+ input. insert ( x) ;
101
+ }
102
+ let result = split_vector ( & input, 150 ) ;
103
+ println ! ( "result: {:#?}" , result) ;
104
+ assert_eq ! ( result. len( ) , 4 ) ;
105
+ }
106
+ }
64
107
#[ test]
65
108
fn for_chunk_iterable_and_flat ( ) {
66
109
let mut subsets = vec ! [
@@ -71,11 +114,12 @@ fn for_chunk_iterable_and_flat() {
71
114
assert_eq ! (
72
115
result,
73
116
vec![
74
- IndexSet :: from( [ 1 , 2 , 3 , 4 , 5 ] ) ,
75
- IndexSet :: from( [ 6 , 7 , 8 , 9 , 10 ] ) ,
76
- IndexSet :: from( [ 11 , 12 , 13 , 14 , 15 ] ) ,
77
- IndexSet :: from( [ 16 , 17 , 18 , 19 , 20 ] ) ,
78
- IndexSet :: from( [ 21 ] ) ,
117
+ IndexSet :: from( [ 1 , 2 , 3 , 4 ] ) ,
118
+ IndexSet :: from( [ 5 , 6 , 7 , 8 ] ) ,
119
+ IndexSet :: from( [ 9 , 10 ] ) ,
120
+ IndexSet :: from( [ 11 , 12 , 13 , 14 ] ) ,
121
+ IndexSet :: from( [ 15 , 16 , 17 , 18 ] ) ,
122
+ IndexSet :: from( [ 19 , 20 , 21 ] ) ,
79
123
]
80
124
) ;
81
125
}
0 commit comments