Skip to content

Commit d3cd14b

Browse files
davidmcgrewGitHub Enterprise
authored andcommitted
Merge pull request #488 from network-intelligence/new-cbor-fps
Adding CBOR encoding and decoding for STUN fingerprints
2 parents e5148c1 + 966c33e commit d3cd14b

File tree

5 files changed

+209
-26
lines changed

5 files changed

+209
-26
lines changed

doc/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# CHANGELOG for Mercury
22

3+
* Added CBOR encoding/decoding for SSH and STUN fingerprints.
34
* Fixing compiler warnings related to ABI differences
45
* CMake changes required to add xsimd as submodule and fixing
56
windows compilation issues

doc/fdc.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Fingerprint and Destination Context (FDC) Schema
2+
3+
4+
5+
This note documents the data formats used in the Mercury Fingerprint and Destination Context (FDC). The FDC is encoded using the Concise Binary Object Representation ([CBOR](https://datatracker.ietf.org/doc/html/rfc8949)), an IETF standard data format that is extensible, consice, and trivially mappable to the common JavaScript Object Notation ([JSON](https://datatracker.ietf.org/doc/html/rfc8259)). The formats are formally defined using the Concise Data Definition Language ([CDDL](https://datatracker.ietf.org/doc/html/rfc8610)), an IETF standard notational convention for unamiguously expressing CBOR and JSON data formats.
6+
7+
A Fingerprint and Destination Context (FDC) object contains a Network Protocol Fingerprint ([NPF](https://github.com/cisco/mercury/blob/main/doc/npf.md)) and other data features, all of which are metadata observed in a single network session. An NPF fingerprint is a set of data features formed by selecting and normalizing some elements of a protocol message, so that they are correlated with the sending application or library implementation. A fingerprint by itself sometimes uniquely identifies an application, but often does not. In the latter case, the other data features are valuable for indentifying the sending application.
8+
9+
- An NPF fingerprint in CBOR encoding, as defined in the [NPF CDDL specification](https://github.com/cisco/mercury/blob/main/doc/npf.cddl).
10+
- The server name, which corresponds to the TLS or QUIC Server Name field or the HTTP Host field.
11+
- The destination IP address, as a string containing a textual representation.
12+
- The destination port number, as an unsigned integer less than 64,535.
13+
- The user agent as a string, which corresponds to the value of the User-Agent header for HTTP, the value of the SOFTWARE attribute for STUN, and the concatenation of the Protocol and Comment strings for SSH.
14+
- Optionally, an unsigned integer corresponding to the truncation code, which indicates whether reassembly was required in order to obtain a complete fingerprint, and whether or not the fingerprint was truncated due to a missing packet. Its values are
15+
- none = 0,
16+
- reassembled = 1
17+
- truncated = 2
18+
- reassembled and truncated = 3.
19+
20+
The protocol (TLS, QUIC, HTTP, STUN, SSH) is identified by the fingerprint.
21+
22+
The formal CDDL defintion is as follows:
23+
24+
```
25+
; fdc is a record-style array of data elements comprised of a
26+
; fingerprint and the associated destination context.
27+
;
28+
fdc = [
29+
fingerprint, ; as defined in npf.cddl
30+
str, ; server name
31+
str, ; destination IP address, textual representation
32+
uint, ; destination port (max: 0xffff)
33+
str, ; user agent
34+
? uint ; truncation
35+
]
36+
```
37+
38+
39+

src/cbor.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ int main(int argc, char *argv[]) {
2929
{ argument::none, "--encode-fingerprint", "encode fingerprint string as CBOR" },
3030
{ argument::required, "--input-file", "read data from file <filename>" },
3131
{ argument::none, "--verbose-tests", "run unit tests in verbose mode" },
32+
{ argument::none, "--verbose", "provide verbose output" },
3233
{ argument::none, "--help", "print out help message" },
3334
});
3435

@@ -42,12 +43,15 @@ int main(int argc, char *argv[]) {
4243
bool decode_fdc = opt.is_set("--decode-fdc");
4344
bool encode_fp = opt.is_set("--encode-fingerprint");
4445
bool verbose_tests = opt.is_set("--verbose-tests");
46+
bool verbose = opt.is_set("--verbose");
4547
bool help_needed = opt.is_set("--help");
4648
if (help_needed) {
4749
opt.usage(stdout, argv[0], summary);
4850
return 0;
4951
}
5052

53+
FILE *verbose_output = verbose ? stderr : nullptr;
54+
5155
if (verbose_tests) {
5256
bool sd_result = static_dictionary<0>::unit_test(stdout);
5357
bool cbor_result = cbor::unit_test(stdout);
@@ -105,7 +109,7 @@ int main(int argc, char *argv[]) {
105109
fprintf(stdout, "\"dst_ip_str\": \"%s\",", dst_ip_str);
106110
fprintf(stdout, "\"dst_port\": %u,", dst_port);
107111
fprintf(stdout, "\"user-agent\": \"%s\",", ua_str);
108-
fprintf(stdout, "\"truncation\": %u", truncation);
112+
fprintf(stdout, "\"truncation\": %lu", truncation);
109113
fprintf(stdout, "}}\n");
110114
} else {
111115
fprintf(stderr, "error: could not decode FDC\n");
@@ -114,6 +118,8 @@ int main(int argc, char *argv[]) {
114118

115119
} else if (encode_fp) {
116120

121+
size_t num_fails = 0;
122+
117123
std::ios::sync_with_stdio(false); // for performance
118124
std::string line;
119125
while (std::getline(std::cin, line)) {
@@ -123,9 +129,9 @@ int main(int argc, char *argv[]) {
123129

124130
// verify that we can represent this fingerprint in CBOR
125131
//
126-
if (!cbor_fingerprint::test_fingerprint(line.c_str())){
132+
if (!cbor_fingerprint::test_fingerprint(line.c_str(), verbose_output)) {
127133
fprintf(stderr, "error: could not encode/decode fingerprint %s\n", line.c_str());
128-
return EXIT_FAILURE;
134+
num_fails++;
129135
}
130136

131137
// convert fingerprint to CBOR
@@ -142,6 +148,10 @@ int main(int argc, char *argv[]) {
142148
//
143149
cbor::decode_fprint(data_buf.contents(), stdout);
144150
}
151+
if (num_fails) {
152+
fprintf(stderr, "error: could not encode/decode %zu fingerprints\n", num_fails);
153+
return EXIT_FAILURE;
154+
}
145155
return EXIT_SUCCESS;
146156

147157
} else {

src/libmerc/cbor.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,12 @@ namespace cbor {
348348
buf << value__;
349349
}
350350

351+
/// writes an empty `byte_string` into \param buf
352+
///
353+
static void write_empty(writeable &buf) {
354+
uint64{0, byte_string_type}.write(buf);
355+
}
356+
351357
/// `cbor::byte_string::unit_test()` performs unit tests on
352358
/// the class \ref cbor::byte_string and returns `true` if
353359
/// they all pass, and `false` otherwise. If \param f ==

src/libmerc/fdc.hpp

Lines changed: 150 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,13 @@ namespace cbor_fingerprint {
5757

5858
inline void encode_cbor_data(datum &d, writeable &w) {
5959
literal_byte<'('>{d};
60-
cbor::byte_string_from_hex{hex_digits{d}}.write(w);
61-
literal_byte<')'>{d};
60+
if (lookahead<literal_byte<')'>> close{d}) {
61+
cbor::byte_string::write_empty(w);
62+
d = close.advance();
63+
} else {
64+
cbor::byte_string_from_hex{hex_digits{d}}.write(w);
65+
literal_byte<')'>{d};
66+
}
6267
}
6368

6469
inline void encode_cbor_list(datum &d, writeable &w) {
@@ -190,6 +195,55 @@ namespace cbor_fingerprint {
190195
m.close();
191196
}
192197

198+
inline void encode_cbor_stun_fingerprint(datum d, writeable &w) {
199+
cbor::output::map m{w};
200+
201+
if (lookahead<literal_byte<'1', '/'>> version_one{d}) {
202+
d = version_one.advance();
203+
cbor::uint64{1}.write(w); // fingerprint version
204+
205+
if (lookahead<literal_byte<'r', 'a', 'n', 'd', 'o', 'm', 'i', 'z', 'e', 'd'>> peek{d}) {
206+
constexpr size_t idx = fp_labels.index("randomized");
207+
cbor::uint64{idx}.write(m);
208+
} else {
209+
cbor::output::array a{w};
210+
encode_cbor_data(d, a); // class
211+
encode_cbor_data(d, a); // method
212+
encode_cbor_data(d, a); // magic
213+
encode_cbor_list(d, a); // attributes
214+
a.close();
215+
}
216+
217+
}
218+
m.close();
219+
}
220+
221+
inline void encode_cbor_ssh_fingerprint(datum d, writeable &w) {
222+
cbor::output::map m{w};
223+
224+
cbor::uint64{0}.write(w); // fingerprint version
225+
226+
if (lookahead<literal_byte<'r', 'a', 'n', 'd', 'o', 'm', 'i', 'z', 'e', 'd'>> peek{d}) {
227+
constexpr size_t idx = fp_labels.index("randomized");
228+
cbor::uint64{idx}.write(m);
229+
} else {
230+
cbor::output::array a{w};
231+
encode_cbor_data(d, a); // kex_algorithms
232+
encode_cbor_data(d, a); // server_host_key_algorithms
233+
encode_cbor_data(d, a); // encryption_algorithms_client_to_server
234+
encode_cbor_data(d, a); // encryption_algorithms_server_to_client
235+
encode_cbor_data(d, a); // mac_algorithms_client_to_server
236+
encode_cbor_data(d, a); // mac_algorithms_server_to_client
237+
encode_cbor_data(d, a); // compression_algorithms_client_to_server
238+
encode_cbor_data(d, a); // compression_algorithms_server_to_client
239+
encode_cbor_data(d, a); // languages_client_to_server
240+
encode_cbor_data(d, a); // languages_server_to_client
241+
a.close();
242+
}
243+
244+
m.close();
245+
}
246+
193247
constexpr uint64_t randomized = 0;
194248
constexpr uint64_t generic = 1;
195249

@@ -255,13 +309,28 @@ namespace cbor_fingerprint {
255309
encode_cbor_tofsee_fingerprint(d, w);
256310
m.close();
257311

312+
} else if (lookahead<literal_byte<'s', 't', 'u', 'n', '/'>> stun{d}) {
313+
fp_type = fingerprint_type_stun;
314+
cbor::output::map m{w};
315+
cbor::uint64{(uint64_t)fp_type}.write(w);
316+
d = stun.advance();
317+
encode_cbor_stun_fingerprint(d, w);
318+
m.close();
319+
320+
} else if (lookahead<literal_byte<'s', 's', 'h', '/'>> ssh{d}) {
321+
fp_type = fingerprint_type_ssh;
322+
cbor::output::map m{w};
323+
cbor::uint64{(uint64_t)fp_type}.write(w);
324+
d = ssh.advance();
325+
encode_cbor_ssh_fingerprint(d, w);
326+
m.close();
327+
258328
}
259329
// fprintf(stderr, "fingerprint type %d\n", fp_type);
260330
}
261331

262332
inline void decode_cbor_data(datum &d, writeable &w) {
263333
cbor::byte_string data = cbor::byte_string::decode(d);
264-
// if (d.is_null()) { return; }
265334
w.copy('(');
266335
w.write_hex(data.value().data, data.value().length());
267336
w.copy(')');
@@ -391,6 +460,54 @@ namespace cbor_fingerprint {
391460
m.close();
392461
}
393462

463+
inline void decode_stun_fp(datum &d, writeable &w) {
464+
cbor::map m{d};
465+
cbor::uint64 format_version{m.value()};
466+
if (format_version.value() == 1) {
467+
w.copy('1');
468+
w.copy('/');
469+
if (lookahead<cbor::uint64> label{m.value()}) {
470+
if (label.value.value() == fp_labels.index("randomized")) {
471+
w << datum{"randomized"};
472+
}
473+
} else {
474+
cbor::array a{m.value()};
475+
decode_cbor_data(m.value(), w); // class
476+
decode_cbor_data(m.value(), w); // method
477+
decode_cbor_data(m.value(), w); // magic
478+
decode_cbor_list(m.value(), w); // attributes
479+
a.close();
480+
}
481+
}
482+
m.close();
483+
}
484+
485+
inline void decode_ssh_fp(datum &d, writeable &w) {
486+
cbor::map m{d};
487+
cbor::uint64 format_version{m.value()};
488+
if (format_version.value() == 0) {
489+
if (lookahead<cbor::uint64> label{m.value()}) {
490+
if (label.value.value() == fp_labels.index("randomized")) {
491+
w << datum{"randomized"};
492+
}
493+
} else {
494+
cbor::array a{m.value()};
495+
decode_cbor_data(m.value(), w); // kex_algorithms
496+
decode_cbor_data(m.value(), w); // server_host_key_algorithms
497+
decode_cbor_data(m.value(), w); // encryption_algorithms_client_to_server
498+
decode_cbor_data(m.value(), w); // encryption_algorithms_server_to_client
499+
decode_cbor_data(m.value(), w); // mac_algorithms_client_to_server
500+
decode_cbor_data(m.value(), w); // mac_algorithms_server_to_client
501+
decode_cbor_data(m.value(), w); // compression_algorithms_client_to_server
502+
decode_cbor_data(m.value(), w); // compression_algorithms_server_to_client
503+
decode_cbor_data(m.value(), w); // languages_client_to_server
504+
decode_cbor_data(m.value(), w); // languages_server_to_client
505+
a.close();
506+
}
507+
}
508+
m.close();
509+
}
510+
394511
inline void decode_fp(unsigned int fp_type,
395512
datum &d,
396513
writeable &w) {
@@ -410,6 +527,12 @@ namespace cbor_fingerprint {
410527
case fingerprint_type_tofsee:
411528
decode_tofsee_fp(d, w);
412529
break;
530+
case fingerprint_type_stun:
531+
decode_stun_fp(d, w);
532+
break;
533+
case fingerprint_type_ssh:
534+
decode_ssh_fp(d, w);
535+
break;
413536
default:
414537
;
415538
}
@@ -433,26 +556,27 @@ namespace cbor_fingerprint {
433556
// test cbor fingerprint encoding and decoding
434557
//
435558
static bool test_fingerprint(const char *fingerprint_string, FILE *f=nullptr) {
436-
data_buffer<2048> data_buf;
437-
datum fp_data{(uint8_t *)fingerprint_string, (uint8_t *)fingerprint_string + strlen(fingerprint_string)};
438-
cbor_fingerprint::encode_cbor_fingerprint(fp_data, data_buf);
439-
440-
data_buffer<2048> out_buf;
441-
datum encoded_data{data_buf.contents()};
442-
cbor_fingerprint::decode_cbor_fingerprint(encoded_data, out_buf);
443-
if (out_buf.contents().cmp(fp_data) != 0) {
444-
if (f) {
445-
fprintf(f, "ERROR: MISMATCH\n");
446-
fprintf(f, "fingerprint: %s\n", fingerprint_string);
447-
fprintf(f, "CBOR encoded fingerprint: ");
448-
data_buf.contents().fprint_hex(f); fputc('\n', f);
449-
fprintf(f, "decoded fingerprint: ");
450-
out_buf.contents().fprint(f); fputc('\n', f);
451-
cbor::decode_fprint(data_buf.contents(), f);
452-
}
453-
return false;
559+
data_buffer<2048> data_buf;
560+
datum fp_data{(uint8_t *)fingerprint_string, (uint8_t *)fingerprint_string + strlen(fingerprint_string)};
561+
cbor_fingerprint::encode_cbor_fingerprint(fp_data, data_buf);
562+
563+
data_buffer<2048> out_buf;
564+
datum encoded_data{data_buf.contents()};
565+
cbor_fingerprint::decode_cbor_fingerprint(encoded_data, out_buf);
566+
567+
if (out_buf.contents().cmp(fp_data) != 0) {
568+
if (f) {
569+
fprintf(f, "ERROR: MISMATCH\n");
570+
fprintf(f, "fingerprint: %s\n", fingerprint_string);
571+
fprintf(f, "CBOR encoded fingerprint: ");
572+
data_buf.contents().fprint_hex(f); fputc('\n', f);
573+
fprintf(f, "decoded fingerprint: ");
574+
out_buf.contents().fprint(f); fputc('\n', f);
575+
cbor::decode_fprint(data_buf.contents(), f);
454576
}
455-
return true;
577+
return false;
578+
}
579+
return true;
456580
};
457581

458582
// cbor_fingerprint::unit_test() returns `true` if all unit tests
@@ -469,7 +593,10 @@ namespace cbor_fingerprint {
469593
"quic/(00000001)(0303)(130113021303)[(000a000a00086399001d00170018)(002b0003020304)((0039)[(01)(03)(04)(05)(06)(07)(08)(09)(0f)(1b)(20)(80004752)(80ff73db)])(4469)]",
470594
"http/randomized",
471595
"tls/1/randomized",
472-
"quic/randomized"
596+
"quic/randomized",
597+
"stun/1/randomized",
598+
"stun/1/(00)(0001)(01)((8022)(0006)(0020)(0008)(8028))",
599+
"ssh/(656364682d736861322d6e697374703235362c656364682d736861322d6e697374703338342c656364682d736861322d6e697374703532312c6469666669652d68656c6c6d616e2d67726f757031342d736861312c6469666669652d68656c6c6d616e2d67726f75702d65786368616e67652d7368613235362c6469666669652d68656c6c6d616e2d67726f75702d65786368616e67652d736861312c6469666669652d68656c6c6d616e2d67726f7570312d73686131)(7373682d7273612c7373682d6473732c65636473612d736861322d6e697374703235362c65636473612d736861322d6e697374703338342c65636473612d736861322d6e69737470353231)(6165733132382d6374722c6165733132382d6362632c336465732d6374722c336465732d6362632c626c6f77666973682d6362632c6165733139322d6374722c6165733139322d6362632c6165733235362d6374722c6165733235362d636263)(6165733132382d6374722c6165733132382d6362632c336465732d6374722c336465732d6362632c626c6f77666973682d6362632c6165733139322d6374722c6165733139322d6362632c6165733235362d6374722c6165733235362d636263)(686d61632d6d64352c686d61632d736861312c686d61632d736861322d3235362c686d61632d736861312d39362c686d61632d6d64352d3936)(686d61632d6d64352c686d61632d736861312c686d61632d736861322d3235362c686d61632d736861312d39362c686d61632d6d64352d3936)(6e6f6e65)(6e6f6e65)()()"
473600
};
474601
bool all_tests_passed = true;
475602
for (const auto & fp_str : fps) {

0 commit comments

Comments
 (0)