Skip to content

Commit a6cd369

Browse files
authored
stream decoder (#15)
stream decoder
1 parent e937f2e commit a6cd369

8 files changed

Lines changed: 277 additions & 26 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,11 @@ include(cmake/get_cpm.cmake)
2525
set(BOOST_INCLUDE_LIBRARIES intrusive)
2626
CPMAddPackage(
2727
NAME Boost
28-
VERSION 1.84.0
28+
VERSION 1.87.0
2929
URL https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.xz
3030
OPTIONS "BOOST_ENABLE_CMAKE ON"
3131
)
3232
unset(BOOST_INCLUDE_LIBRARIES)
33-
find_package(Boost 1.84 COMPONENTS intrusive REQUIRED)
3433

3534
endif()
3635

include/hpack/basic_types.hpp

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
#pragma once
22

3+
#include <algorithm>
34
#include <cstdint>
45
#include <iterator>
56
#include <string_view>
6-
77
#include <exception>
88

99
namespace hpack {
@@ -19,9 +19,15 @@ struct protocol_error : std::exception {
1919
}
2020
};
2121

22-
} // namespace hpack
22+
// thrown if there are not enough data for reading header
23+
struct incomplete_data_error : hpack::protocol_error {
24+
// approx value - how many bytes need to be readen for receiving next part (int or string)
25+
size_t required_bytes = 0;
2326

24-
namespace hpack {
27+
explicit incomplete_data_error(size_t required_bytes_approx)
28+
: hpack::protocol_error("incomplete data"), required_bytes(required_bytes_approx) {
29+
}
30+
};
2531

2632
struct sym_info_t {
2733
uint32_t bits;
@@ -46,6 +52,8 @@ concept Out = std::output_iterator<T, byte_t>;
4652

4753
namespace noexport {
4854

55+
// caches first byte for avoiding *it = x == push_back,
56+
// so *it | mask will be into next byte (may be with back_inserter)
4957
template <typename T>
5058
struct adapted_output_iterator {
5159
T base_it;
@@ -100,6 +108,54 @@ Original unadapt(byte_t* ptr) {
100108
return reinterpret_cast<Original>(ptr);
101109
}
102110

111+
// standard interface (e.g. for vector) for inserting many values at back
112+
// Note: ignores fact, that someone can make super-bad type with push_back + insert making something wrong
113+
template <typename T>
114+
constexpr inline bool can_insert_many =
115+
requires(T& value, const char* p) { value.insert(value.end(), p, p); };
116+
117+
// standard back_insert_iterator rly uses protected field exactly for such accessing
118+
template <typename C>
119+
C& access_protected_container(std::back_insert_iterator<C> c) {
120+
static_assert(std::is_trivially_copyable_v<decltype(c)>);
121+
struct accessor : std::back_insert_iterator<C> {
122+
C* get() noexcept {
123+
return this->container;
124+
}
125+
};
126+
return *accessor{c}.get();
127+
}
128+
129+
template <typename C>
130+
requires(can_insert_many<C>)
131+
std::back_insert_iterator<C> do_copy_n_fast(const char* ptr, size_t sz, std::back_insert_iterator<C> it) {
132+
auto& c = access_protected_container(it);
133+
c.insert(c.end(), ptr, ptr + sz);
134+
return it; // back insert iterator does not change on ++/* etc
135+
}
136+
137+
template <typename C>
138+
requires(can_insert_many<C>)
139+
adapted_output_iterator<std::back_insert_iterator<C>> do_copy_n_fast(
140+
const char* ptr, size_t sz, adapted_output_iterator<std::back_insert_iterator<C>> it) {
141+
auto& c = access_protected_container(it.base_it);
142+
c.insert(c.end(), ptr, ptr + sz);
143+
return it; // adapted iterator must be unchanged, since base_it unchanged (its back inserter)
144+
}
145+
146+
// fallback
147+
template <typename It>
148+
It do_copy_n_fast(const char* ptr, size_t sz, It it) {
149+
return std::copy_n(ptr, sz, std::move(it));
150+
}
151+
152+
// makes copy_n, but for back_insert iterator makes insert(end, It, It + n)
153+
// this converts many push_backs into one uninitialized_copy_n
154+
template <typename It>
155+
It copy_n_fast(const char* ptr, size_t sz, It it) {
156+
return do_copy_n_fast(ptr, sz, std::move(it));
157+
}
158+
103159
} // namespace noexport
104160

105161
struct table_entry {

include/hpack/decoder.hpp

Lines changed: 95 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33
#include "hpack/basic_types.hpp"
44
#include "hpack/dynamic_table.hpp"
55

6+
#include <span>
67
#include <utility>
78

89
namespace hpack {
910

11+
// helper for `decode_string`. Temporal storage for decoded strings
12+
//
13+
// may store string_view (AND NOT OWN IT!), may store huffman decoded string
14+
// and in this case memory will be allocated and owned
15+
// tries to reuse memory when new huffman string setted and memory already allocated
1016
struct decoded_string {
1117
private:
1218
const char* data = nullptr;
@@ -15,10 +21,6 @@ struct decoded_string {
1521
// default -1 for removing ambiguity between 'not allocated' and 'allocated 1 byte' (log2(1) == 0)
1622
int8_t allocated_sz_log2 = -1;
1723

18-
friend void decode_string(In&, In, decoded_string&);
19-
20-
void set_huffman(const char* ptr, size_type len);
21-
2224
public:
2325
decoded_string() = default;
2426

@@ -43,12 +45,18 @@ struct decoded_string {
4345
return *this;
4446
}
4547

48+
void set_huffman(const char* ptr, size_type len);
49+
// Note: *this will not own `ptr` memory, only contain a view
50+
void set_not_huffman(const char* ptr, size_type len) {
51+
reset();
52+
data = ptr;
53+
sz = len;
54+
}
55+
4656
// not huffman encoded string
4757
decoded_string& operator=(std::string_view str) noexcept {
4858
assert(std::in_range<size_type>(str.size()));
49-
reset();
50-
data = str.data();
51-
sz = str.size();
59+
set_not_huffman(str.data(), str.size());
5260
return *this;
5361
}
5462

@@ -123,7 +131,6 @@ struct header_view {
123131
}
124132
};
125133

126-
// precondition: in != e
127134
void decode_string(In& in, In e, decoded_string& out);
128135

129136
struct decoder {
@@ -137,17 +144,96 @@ struct decoder {
137144

138145
decoder(decoder&&) = default;
139146
decoder& operator=(decoder&&) noexcept = default;
147+
140148
/*
141149
Note: this function ignores special 'cookie' header case
142150
https://www.rfc-editor.org/rfc/rfc7540#section-8.1.2.5
143151
and protocol error if decoded header name is not lowercase
144152
*/
145-
// precondition: in != e
146153
void decode_header(In& in, In e, header_view& out);
147154

148155
// returns status code
149156
// its always first header of response, so 'in' must point to first byte of headers block
150157
int decode_response_status(In& in, In e);
151158
};
152159

160+
// eats parts of headers fragment, allowing to parse CONTINUATIONS in HTTP/2 part by part
161+
struct stream_decoder {
162+
private:
163+
decoder& dec;
164+
std::vector<byte_t> incomplete;
165+
166+
// returns where first unparsed byte starts
167+
template <typename V>
168+
In do_feed(std::span<byte_t> chunk, bool last_chunk, V&& visitor, size_t& approx) {
169+
In in = chunk.data();
170+
In e = in + chunk.size();
171+
assert(in != e);
172+
In in_just_before_fail;
173+
approx = 0;
174+
try {
175+
header_view header;
176+
while (in != e) {
177+
in_just_before_fail = in;
178+
179+
dec.decode_header(in, e, header);
180+
181+
if (header) [[likely]] // dynamic size update decoded without error
182+
visitor(header.name.str(), header.value.str());
183+
}
184+
// successfully parsed all headers
185+
return e;
186+
} catch (hpack::incomplete_data_error& e) {
187+
approx = e.required_bytes;
188+
if (last_chunk)
189+
throw;
190+
return in_just_before_fail;
191+
}
192+
}
193+
194+
public:
195+
stream_decoder(decoder& d) noexcept : dec(d) {
196+
}
197+
198+
stream_decoder(stream_decoder&&) = delete;
199+
void operator=(stream_decoder&&) = delete;
200+
201+
// `visitor` should accept two string_views, name and value
202+
// optimized for case when each `chunk` >> 1 header
203+
// returns approx count of bytes required for receiving next part of header
204+
// or 0 if there are no unhandled data in chunk
205+
// e.g. may be used to detect too big string before receiving it
206+
template <typename V>
207+
size_t feed(std::span<byte_t> chunk, bool last_chunk, V&& visitor) {
208+
if (chunk.empty()) [[unlikely]]
209+
return 0;
210+
size_t approx;
211+
if (!incomplete.empty()) {
212+
incomplete.insert(incomplete.end(), chunk.begin(), chunk.end());
213+
In i = do_feed(incomplete, last_chunk, std::forward<V>(visitor), approx);
214+
In e = incomplete.data() + incomplete.size();
215+
auto sz = e - i;
216+
// avoid UB on .assign (iterators into vector itself)
217+
memmove(incomplete.data(), i, sz);
218+
incomplete.resize(sz);
219+
} else {
220+
In i = do_feed(chunk, last_chunk, std::forward<V>(visitor), approx);
221+
incomplete.assign(i, In(chunk.data()) + chunk.size());
222+
}
223+
return approx;
224+
}
225+
226+
// returns such value, that `pending_data_size` + `feed` result == almost exact value of bytes which will be
227+
// stored until next part of header (not header itself!) will be parsed.
228+
// Note, there are only 2 parts of header - name and value
229+
[[nodiscard]] size_t pending_data_size() const noexcept {
230+
return incomplete.size();
231+
}
232+
233+
// makes possible start from beginning, forgetting previous `feed` calls
234+
void clear() noexcept {
235+
incomplete.clear();
236+
}
237+
};
238+
153239
} // namespace hpack

include/hpack/hpack.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ V decode_headers_block(decoder& dec, std::span<const byte_t> bytes, V visitor) {
2929
header_view header;
3030
while (in != e) {
3131
dec.decode_header(in, e, header);
32-
if (header) // dynamic size update decoded without error
32+
if (header) [[likely]] // dynamic size update decoded without error
3333
visitor(header.name.str(), header.value.str());
3434
}
3535
return visitor;

include/hpack/integers.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
22

33
#include <concepts>
4-
4+
#include <cassert>
55
#include "hpack/basic_types.hpp"
66

77
namespace hpack {
@@ -49,17 +49,19 @@ O encode_integer(std::type_identity_t<UInt> I, uint8_t N, O _out) noexcept {
4949
return noexport::unadapt<O>(out);
5050
}
5151

52+
// precondition: N <= 8
5253
template <std::unsigned_integral UInt = size_type>
53-
[[nodiscard]] size_type decode_integer(In& in, In e, uint8_t N) {
54+
[[nodiscard]] UInt decode_integer(In& in, In e, uint8_t N) {
55+
assert(N <= 8);
5456
const UInt prefix_mask = (1 << N) - 1;
55-
// get first N bits
5657
auto pull = [&] {
5758
if (in == e)
58-
throw HPACK_PROTOCOL_ERROR(invalid integer representation);
59+
throw incomplete_data_error(2);
5960
auto i = *in;
6061
++in;
6162
return i;
6263
};
64+
// get first N bits
6365
UInt I = pull() & prefix_mask;
6466
if (I < prefix_mask)
6567
return I;

include/hpack/strings.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ O encode_string(std::string_view str, O _out) {
5757
if constexpr (!Huffman) {
5858
*out = 0; // set H bit to 0
5959
out = encode_integer(str.size(), 7, out);
60-
out = std::copy_n(str.data(), str.size(), out);
60+
out = noexport::copy_n_fast(str.data(), str.size(), out);
6161
} else {
6262
out = encode_string_huffman(str, out);
6363
}

src/decoder.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,7 @@ static void decode_header_fully_indexed(In& in, In e, dynamic_table_t& dyntab, h
132132
out = entry;
133133
}
134134

135-
// header with incremental indexing
136-
static void decode_header_cache(In& in, In e, dynamic_table_t& dyntab, header_view& out) {
135+
static void decode_header_incremental_indexing(In& in, In e, dynamic_table_t& dyntab, header_view& out) {
137136
assert(in != e && *in & 0b0100'0000);
138137
decode_header_impl(in, e, 6, dyntab, out);
139138
dyntab.add_entry(out.name.str(), out.value.str());
@@ -159,15 +158,15 @@ static size_type decode_dynamic_table_size_update(In& in, In e) {
159158

160159
void decode_string(In& in, In e, decoded_string& out) {
161160
if (in == e)
162-
throw HPACK_PROTOCOL_ERROR(incorrectly encoded string);
161+
throw incomplete_data_error(1);
163162
bool is_huffman = *in & 0b1000'0000;
164163
size_type str_len = decode_integer(in, e, 7);
165164
if (str_len > std::distance(in, e))
166-
throw HPACK_PROTOCOL_ERROR(size of encoded string not equal to data length);
165+
throw incomplete_data_error(str_len - std::distance(in, e));
167166
if (is_huffman)
168167
out.set_huffman((const char*)in, str_len);
169168
else
170-
out = std::string_view((const char*)in, str_len);
169+
out.set_not_huffman((const char*)in, str_len);
171170
in += str_len;
172171
}
173172

@@ -176,7 +175,7 @@ void decoder::decode_header(In& in, In e, header_view& out) {
176175
if (*in & 0b1000'0000)
177176
return decode_header_fully_indexed(in, e, dyntab, out);
178177
if (*in & 0b0100'0000)
179-
return decode_header_cache(in, e, dyntab, out);
178+
return decode_header_incremental_indexing(in, e, dyntab, out);
180179
if (*in & 0b0010'0000) {
181180
dyntab.update_size(decode_dynamic_table_size_update(in, e));
182181
out.name.reset();

0 commit comments

Comments
 (0)