Skip to content

Commit 8fcf774

Browse files
committed
RangeSet (will be used instead of BitMask in files) implementation with tests
GitOrigin-RevId: 977f0776d0b7ef96a31364d2a68cfe980f2845c8
1 parent 04c9680 commit 8fcf774

File tree

3 files changed

+251
-0
lines changed

3 files changed

+251
-0
lines changed

tdutils/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ if (TDUTILS_MIME_TYPE)
270270
endif()
271271

272272
set(TDUTILS_TEST_SOURCE
273+
${CMAKE_CURRENT_SOURCE_DIR}/test/bitmask.cpp
273274
${CMAKE_CURRENT_SOURCE_DIR}/test/buffer.cpp
274275
${CMAKE_CURRENT_SOURCE_DIR}/test/ConcurrentHashMap.cpp
275276
${CMAKE_CURRENT_SOURCE_DIR}/test/crypto.cpp

tdutils/test/bitmask.cpp

+246
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
//
2+
// Copyright Aliaksei Levin ([email protected]), Arseny Smirnov ([email protected]) 2014-2020
3+
//
4+
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5+
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
//
7+
#include "td/utils/tests.h"
8+
#include "td/utils/misc.h"
9+
#include "td/utils/utf8.h"
10+
11+
namespace td {
12+
class RangeSet {
13+
template <class T>
14+
static auto find(T &ranges, int64 begin) {
15+
return std::lower_bound(ranges.begin(), ranges.end(), begin,
16+
[](const Range &range, int64 begin) { return range.end < begin; });
17+
}
18+
auto find(int64 begin) const {
19+
return find(ranges_, begin);
20+
}
21+
auto find(int64 begin) {
22+
return find(ranges_, begin);
23+
}
24+
25+
public:
26+
struct Range {
27+
int64 begin;
28+
int64 end;
29+
};
30+
31+
static constexpr int64 BitSize = 1024;
32+
static constexpr int64 MaxPartSize = 16 * 1024 * 1024;
33+
34+
RangeSet() = default;
35+
36+
static RangeSet create_one_range(int64 end, int64 begin = 0) {
37+
RangeSet res;
38+
res.ranges_.push_back({begin, end});
39+
return res;
40+
}
41+
static td::Result<RangeSet> decode(CSlice data) {
42+
if (!check_utf8(data)) {
43+
return Status::Error("Invalid encoding");
44+
}
45+
uint32 curr = 0;
46+
bool is_empty = false;
47+
RangeSet res;
48+
for (auto begin = data.ubegin(); begin != data.uend();) {
49+
uint32 size;
50+
begin = next_utf8_unsafe(begin, &size, "RangeSet");
51+
52+
if (!is_empty && size != 0) {
53+
res.ranges_.push_back({curr * BitSize, (curr + size) * BitSize});
54+
}
55+
curr += size;
56+
is_empty = !is_empty;
57+
}
58+
return res;
59+
}
60+
61+
std::string encode(int64 prefix_size = -1) const {
62+
std::vector<uint32> sizes;
63+
uint32 all_end = 0;
64+
65+
if (prefix_size != -1) {
66+
prefix_size = (prefix_size + BitSize - 1) / BitSize * BitSize;
67+
}
68+
for (auto it : ranges_) {
69+
if (prefix_size != -1 && it.begin >= prefix_size) {
70+
break;
71+
}
72+
if (prefix_size != -1 && it.end > prefix_size) {
73+
it.end = prefix_size;
74+
}
75+
76+
CHECK(it.begin % BitSize == 0);
77+
CHECK(it.end % BitSize == 0);
78+
uint32 begin = narrow_cast<uint32>(it.begin / BitSize);
79+
uint32 end = narrow_cast<uint32>(it.end / BitSize);
80+
if (sizes.empty()) {
81+
if (begin != 0) {
82+
sizes.push_back(0);
83+
sizes.push_back(begin);
84+
}
85+
} else {
86+
sizes.push_back(begin - all_end);
87+
}
88+
sizes.push_back(end - begin);
89+
all_end = end;
90+
}
91+
92+
std::string res;
93+
for (auto c : sizes) {
94+
append_utf8_character(res, c);
95+
}
96+
return res;
97+
}
98+
99+
int64 get_ready_prefix_size(int64 offset, int64 file_size = -1) const {
100+
auto it = find(offset);
101+
if (it == ranges_.end()) {
102+
return 0;
103+
}
104+
if (it->begin > offset) {
105+
return 0;
106+
}
107+
CHECK(offset >= it->begin);
108+
CHECK(offset <= it->end);
109+
auto end = it->end;
110+
if (file_size != -1 && end > file_size) {
111+
end = file_size;
112+
}
113+
if (end < offset) {
114+
return 0;
115+
}
116+
return end - offset;
117+
}
118+
int64 get_total_size(int64 file_size) const {
119+
int64 res = 0;
120+
for (auto it : ranges_) {
121+
if (it.begin >= file_size) {
122+
break;
123+
}
124+
if (it.end > file_size) {
125+
it.end = file_size;
126+
}
127+
res += it.end - it.begin;
128+
}
129+
return res;
130+
}
131+
int64 get_ready_parts(int64 offset_part, int64 part_size) const {
132+
auto offset = offset_part * part_size;
133+
auto it = find(offset);
134+
if (it == ranges_.end()) {
135+
return 0;
136+
}
137+
if (it->begin > offset) {
138+
return 0;
139+
}
140+
return (it->end - offset) / part_size;
141+
}
142+
143+
bool is_ready(int64 begin, int64 end) const {
144+
auto it = find(begin);
145+
if (it == ranges_.end()) {
146+
return false;
147+
}
148+
return it->begin <= begin && end <= it->end;
149+
}
150+
151+
void set(int64 begin, int64 end) {
152+
CHECK(begin % BitSize == 0);
153+
CHECK(end % BitSize == 0);
154+
// 1. skip all with r.end < begin
155+
auto it_begin = find(begin);
156+
157+
// 2. combine with all r.begin <= end
158+
auto it_end = it_begin;
159+
for (; it_end != ranges_.end() && it_end->begin <= end; ++it_end) {
160+
}
161+
162+
if (it_begin == it_end) {
163+
ranges_.insert(it_begin, Range{begin, end});
164+
} else {
165+
begin = std::min(begin, it_begin->begin);
166+
--it_end;
167+
end = std::max(end, it_end->end);
168+
*it_end = Range{begin, end};
169+
ranges_.erase(it_begin, it_end);
170+
}
171+
}
172+
173+
std::vector<int32> as_vector(int32 part_size) const {
174+
std::vector<int32> res;
175+
for (auto it : ranges_) {
176+
auto begin = narrow_cast<int32>((it.begin + part_size - 1) / part_size);
177+
auto end = narrow_cast<int32>(it.end / part_size);
178+
while (begin < end) {
179+
res.push_back(begin++);
180+
}
181+
}
182+
return res;
183+
}
184+
185+
private:
186+
std::vector<Range> ranges_;
187+
};
188+
189+
TEST(Bitmask, simple) {
190+
auto validate_encoding = [](auto &rs) {
191+
auto str = rs.encode();
192+
LOG(ERROR) << str.size();
193+
RangeSet rs2 = RangeSet::decode(str).move_as_ok();
194+
auto str2 = rs2.encode();
195+
rs = std::move(rs2);
196+
CHECK(str2 == str);
197+
};
198+
{
199+
RangeSet rs;
200+
int32 S = 128 * 1024;
201+
int32 O = S * 5000;
202+
for (int i = 1; i < 30; i++) {
203+
if (i % 2 == 0) {
204+
rs.set(O + S * i, O + S * (i + 1));
205+
}
206+
}
207+
validate_encoding(rs);
208+
}
209+
{
210+
RangeSet rs;
211+
int32 S = 1024;
212+
auto get = [&](auto p) {
213+
return rs.get_ready_prefix_size(p * S) / S;
214+
};
215+
auto set = [&](auto l, auto r) {
216+
rs.set(l * S, r * S);
217+
validate_encoding(rs);
218+
ASSERT_TRUE(rs.is_ready(l * S, r * S));
219+
ASSERT_TRUE(get(l) >= (r - l));
220+
};
221+
set(6, 7);
222+
ASSERT_EQ(1, get(6));
223+
ASSERT_EQ(0, get(5));
224+
set(8, 9);
225+
ASSERT_EQ(0, get(7));
226+
set(7, 8);
227+
ASSERT_EQ(2, get(7));
228+
ASSERT_EQ(3, get(6));
229+
set(3, 5);
230+
ASSERT_EQ(1, get(4));
231+
set(4, 6);
232+
ASSERT_EQ(5, get(4));
233+
set(10, 11);
234+
set(9, 10);
235+
ASSERT_EQ(8, get(3));
236+
set(14, 16);
237+
set(12, 13);
238+
ASSERT_EQ(8, get(3));
239+
240+
ASSERT_EQ(10, rs.get_ready_prefix_size(S * 3, S * 3 + 10));
241+
ASSERT_TRUE(!rs.is_ready(S*11, S *12));
242+
ASSERT_EQ(3, rs.get_ready_parts(2, S * 2));
243+
ASSERT_EQ(std::vector<int32>({2, 3, 4, 7}), rs.as_vector(S * 2) );
244+
}
245+
}
246+
} // namespace td

test/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,16 @@ target_link_libraries(all_tests PRIVATE tdcore tdclient)
3232

3333
if (NOT CMAKE_CROSSCOMPILING OR EMSCRIPTEN)
3434
#Tests
35+
add_executable(test-tdutils ${TESTS_MAIN} ${TDUTILS_TEST_SOURCE})
3536
add_executable(run_all_tests ${TESTS_MAIN} ${TD_TEST_SOURCE})
3637
if (CLANG AND NOT CYGWIN AND NOT EMSCRIPTEN AND NOT (CMAKE_HOST_SYSTEM_NAME MATCHES "OpenBSD"))
38+
target_compile_options(test-tdutils PUBLIC -fsanitize=undefined -fno-sanitize=vptr)
3739
target_compile_options(run_all_tests PUBLIC -fsanitize=undefined -fno-sanitize=vptr)
3840
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined -fno-sanitize=vptr")
3941
endif()
4042
target_include_directories(run_all_tests PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
43+
target_include_directories(test-tdutils PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
44+
target_link_libraries(test-tdutils PRIVATE tdutils)
4145
target_link_libraries(run_all_tests PRIVATE tdcore tdclient)
4246

4347
if (CLANG)

0 commit comments

Comments
 (0)