Skip to content

Commit df34f7c

Browse files
davidmcgrewGitHub Enterprise
authored andcommitted
Merge branch 'dev' into new-cbor-fps
2 parents 74c598a + 1dc8a05 commit df34f7c

File tree

9 files changed

+64
-51
lines changed

9 files changed

+64
-51
lines changed

Makefile_helper.mk.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ CFLAGS += -Wno-deprecated-declarations
5858
CFLAGS += -Wno-long-long
5959
CFLAGS += -Wmissing-noreturn
6060
CFLAGS += -Wunreachable-code
61+
CFLAGS += -Wno-psabi
6162
CFLAGS += -fvisibility=hidden
6263
CFLAGS += -DNDEBUG
6364
# CFLAGS += -g

doc/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# CHANGELOG for Mercury
22

33
* Added CBOR encoding/decoding for SSH and STUN fingerprints.
4+
* Fixing compiler warnings related to ABI differences
45
* CMake changes required to add xsimd as submodule and fixing
56
windows compilation issues
67
* Removed duplicate UTF-8 and IP address output code used in

src/libmerc/flow_key.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,12 @@ struct key {
140140
return addr.ipv6.dst.is_global();
141141
}
142142

143-
// write out the (optionally normalized) destination address
143+
// write out the destination address
144144
//
145-
void sprintf_dst_addr(char *dst_addr_str, bool norm=true) const {
145+
void sprintf_dst_addr(char *dst_addr_str) const {
146146

147147
if (ip_vers == 4) {
148148
ipv4_address tmp_addr{addr.ipv4.dst};
149-
if (norm) {
150-
normalize(tmp_addr);
151-
}
152149
uint8_t *d = (uint8_t *)&tmp_addr;
153150
snprintf(dst_addr_str,
154151
MAX_ADDR_STR_LEN,
@@ -157,9 +154,6 @@ struct key {
157154

158155
} else if (ip_vers == 6) {
159156
ipv6_address tmp_addr{addr.ipv6.dst};
160-
if (norm) {
161-
normalize(tmp_addr);
162-
}
163157
uint8_t *d = (uint8_t *)&tmp_addr;
164158
sprintf_ipv6_addr(dst_addr_str, d);
165159
} else {

src/libmerc/ip_address.hpp

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ bool ipv4_address::unit_test(FILE *output) { // output=nullptr by default
205205
return all_passed;
206206
}
207207

208+
209+
using ipv6_array_t = std::array<uint8_t, 16>;
210+
208211
/// an IP version six address in network byte order. This class
209212
/// represents a raw (binary) address; to parse a textual
210213
/// representation of an IPv6 address, use \ref ipv6_address_string.
@@ -307,11 +310,7 @@ struct ipv6_address {
307310
multicast
308311
};
309312

310-
// bool is_global_unicast() const {
311-
// return (a & 0xe0000000) == 0x20000000;
312-
// }
313313
bool is_global_unicast() const {
314-
// fprintf(stderr, "check: %08x\t%08x==%08x\n", a, (a & hton<uint32_t>(0xe0000000)), hton<uint32_t>(0x20000000));
315314
return (a[0] & hton<uint32_t>(0xe0000000)) == hton<uint32_t>(0x20000000);
316315
}
317316
bool is_unique_local_unicast() const {
@@ -327,7 +326,7 @@ struct ipv6_address {
327326
return (a[0] & hton<uint32_t>(0xff000000)) == hton<uint32_t>(0xff000000);
328327
}
329328
bool is_global() const {
330-
return is_global_unicast(); // TODO: consider global multicast
329+
return is_global_unicast() || is_ipv4_mapped();
331330
}
332331
bool is_ipv4_mapped() const {
333332
return (a[0] == 0 && a[1] == 0 && a[2] == hton<uint32_t>(0x0000ffff));
@@ -337,6 +336,20 @@ struct ipv6_address {
337336

338337
};
339338

339+
// hasher for ipv6_address
340+
//
341+
namespace std {
342+
template <>
343+
struct hash<ipv6_address> {
344+
std::size_t operator()(const ipv6_address& addr) const {
345+
return std::hash<uint32_t>{}(addr.a[0])
346+
^ std::hash<uint32_t>{}(addr.a[1])
347+
^ std::hash<uint32_t>{}(addr.a[2])
348+
^ std::hash<uint32_t>{}(addr.a[3]);
349+
}
350+
};
351+
}
352+
340353
inline bool ipv6_address::unit_test() {
341354

342355
// ipv6_address addr;
@@ -384,16 +397,18 @@ namespace normalized {
384397
static const ipv6_address ipv6_unique_local{0x000000fd, 0x00000000, 0x00000000, 0x01000000 };
385398
};
386399

387-
inline void normalize(ipv4_address &a) {
400+
inline ipv4_address normalize(const ipv4_address &a) {
388401
if (!a.is_global()) {
389-
a = normalized::ipv4_private_use;
402+
return normalized::ipv4_private_use;
390403
}
404+
return a;
391405
}
392406

393-
inline void normalize(ipv6_address &a) {
407+
inline ipv6_address normalize(const ipv6_address &a) {
394408
if (!a.is_global()) {
395-
a = normalized::ipv6_unique_local;
409+
return normalized::ipv6_unique_local;
396410
}
411+
return a;
397412
}
398413

399414
struct ip_address {
@@ -543,8 +558,6 @@ T hex_str_to_uint(const hex_digits &d) {
543558
}
544559

545560

546-
using ipv6_array_t = std::array<uint8_t, 16>;
547-
548561
static inline void ipv6_array_print(FILE *f, ipv6_array_t ipv6) {
549562
fprintf(f,
550563
"%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x",
@@ -865,6 +878,11 @@ class ipv6_address_string {
865878
return x;
866879
}
867880

881+
ipv6_address get_address() const {
882+
ipv6_array_t arry = get_value_array();
883+
return get_ipv6_address(arry);
884+
}
885+
868886
// unit_test() is a static function that performs a unit test of
869887
// this class, using the example addresses from RFC 4291. It
870888
// returns true if all tests pass, and false otherwise.

src/libmerc/naive_bayes.hpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ inline auto feature<uint64_t>::convert(const char *s) -> uint64_t {
227227
///
228228
class ip_addr_feature {
229229
std::unordered_map<uint32_t, std::vector<class update>> ipv4_updates;
230-
std::unordered_map<ipv6_array_t, std::vector<class update>> ipv6_updates;
230+
std::unordered_map<ipv6_address, std::vector<class update>> ipv6_updates;
231231

232232
public:
233233
std::string json_name;
@@ -278,15 +278,15 @@ class ip_addr_feature {
278278
class update u{ (unsigned int)process_index, (log((floating_point_type)count / total_count) - base_prior) * weight };
279279

280280
if (lookahead<ipv4_address_string> ipv4{datum{feature_value}}) {
281-
uint32_t addr = ipv4.value.get_value();
282-
auto update = ipv4_updates.find(addr);
281+
ipv4_address addr = normalize(ipv4.value.get_value());
282+
auto update = ipv4_updates.find(addr.get_value());
283283
if (update != ipv4_updates.end()) {
284284
update->second.push_back(u);
285285
} else {
286-
ipv4_updates[addr] = { u };
286+
ipv4_updates[addr.get_value()] = { u };
287287
}
288288
} else if (lookahead<ipv6_address_string> ipv6{datum{feature_value}}) {
289-
ipv6_array_t addr = ipv6.value.get_value_array();
289+
ipv6_address addr = normalize(ipv6.value.get_address());
290290
auto update = ipv6_updates.find(addr);
291291
if (update != ipv6_updates.end()) {
292292
update->second.push_back(u);
@@ -300,14 +300,15 @@ class ip_addr_feature {
300300
//
301301
void update(std::vector<floating_point_type> &prob_vector, const std::string &dst_ip_str) const {
302302
if (lookahead<ipv4_address_string> ipv4{datum{dst_ip_str}}) {
303-
auto ip_ip_update = ipv4_updates.find(ipv4.value.get_value());
303+
ipv4_address addr = normalize(ipv4.value.get_value());
304+
auto ip_ip_update = ipv4_updates.find(addr.get_value());
304305
if (ip_ip_update != ipv4_updates.end()) {
305306
for (const auto &x : ip_ip_update->second) {
306307
prob_vector[x.index] += x.value;
307308
}
308309
}
309310
} else if (lookahead<ipv6_address_string> ipv6{datum{dst_ip_str}}) {
310-
auto ip_ip_update = ipv6_updates.find(ipv6.value.get_value_array());
311+
auto ip_ip_update = ipv6_updates.find(normalize(ipv6.value.get_address()));
311312
if (ip_ip_update != ipv6_updates.end()) {
312313
for (const auto &x : ip_ip_update->second) {
313314
prob_vector[x.index] += x.value;
@@ -320,14 +321,15 @@ class ip_addr_feature {
320321

321322
void update(std::vector<floating_point_type> &prob_vector, const std::string &dst_ip_str, floating_point_type w) const {
322323
if (lookahead<ipv4_address_string> ipv4{datum{dst_ip_str}}) {
323-
auto ip_ip_update = ipv4_updates.find(ipv4.value.get_value());
324+
ipv4_address addr = normalize(ipv4.value.get_value());
325+
auto ip_ip_update = ipv4_updates.find(addr.get_value());
324326
if (ip_ip_update != ipv4_updates.end()) {
325327
for (const auto &x : ip_ip_update->second) {
326328
prob_vector[x.index] += x.value * (w / weight);
327329
}
328330
}
329331
} else if (lookahead<ipv6_address_string> ipv6{datum{dst_ip_str}}) {
330-
auto ip_ip_update = ipv6_updates.find(ipv6.value.get_value_array());
332+
auto ip_ip_update = ipv6_updates.find(normalize(ipv6.value.get_address()));
331333
if (ip_ip_update != ipv6_updates.end()) {
332334
for (const auto &x : ip_ip_update->second) {
333335
prob_vector[x.index] += x.value * (w / weight);

src/libmerc/pkt_proc.h

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ struct stateful_pkt_proc {
106106
mercury_context m;
107107
classifier *c; // TODO: change to reference
108108
data_aggregator *ag;
109-
global_config global_vars;
109+
const global_config &global_vars;
110110
class traffic_selector &selector;
111111
quic_crypto_engine quic_crypto;
112112
struct tcp_reassembler *reassembler_ptr = nullptr;
@@ -119,7 +119,7 @@ struct stateful_pkt_proc {
119119
analysis{},
120120
mq{nullptr},
121121
m{mc},
122-
c{nullptr},
122+
c{mc->c},
123123
ag{nullptr},
124124
global_vars{mc->global_vars},
125125
selector{mc->selector},
@@ -137,11 +137,9 @@ struct stateful_pkt_proc {
137137

138138
// set config and classifier to (refer to) context m
139139
// analysis requires `do_analysis` & `resources` to be set
140-
if (m->c == nullptr && m->global_vars.do_analysis && m->global_vars.resources != nullptr) {
140+
if (c == nullptr && global_vars.do_analysis && global_vars.resources != nullptr) {
141141
throw std::runtime_error("error: classifier pointer is null");
142142
}
143-
this->c = m->c;
144-
this->global_vars = m->global_vars;
145143

146144
// setting protocol based configuration option to output the raw features
147145
set_raw_features(global_vars.raw_features);
@@ -289,26 +287,26 @@ struct stateful_pkt_proc {
289287

290288
bool dump_pkt ();
291289

292-
void set_raw_features(std::unordered_map<std::string, bool> &raw_features) {
293-
if (raw_features["all"] or raw_features["tls"]) {
290+
void set_raw_features(const std::unordered_map<std::string, bool> &raw_features) {
291+
if (raw_features.at("all") or raw_features.at("tls")) {
294292
tls_client_hello::set_raw_features(true);
295293
}
296-
297-
if (raw_features["all"] or raw_features["stun"]) {
294+
295+
if (raw_features.at("all") or raw_features.at("stun")) {
298296
stun::message::set_raw_features(true);
299297
}
300-
301-
if (raw_features["all"] or raw_features["bittorrent"]) {
298+
299+
if (raw_features.at("all") or raw_features.at("bittorrent")) {
302300
bittorrent_dht::set_raw_features(true);
303301
bittorrent_lsd::set_raw_features(true);
304302
bittorrent_handshake::set_raw_features(true);
305303
}
306-
307-
if (raw_features["all"] or raw_features["smb"]) {
304+
305+
if (raw_features.at("all") or raw_features.at("smb")) {
308306
smb2_packet::set_raw_features(true);
309307
}
310-
311-
if (raw_features["all"] or raw_features["ssdp"]) {
308+
309+
if (raw_features.at("all") or raw_features.at("ssdp")) {
312310
ssdp::set_raw_features(true);
313311
}
314312
}

src/libmerc/watchlist.hpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ class server_identifier {
333333
a += '.';
334334
}
335335
ipv4_address addr = std::get<uint32_t>(host_id);
336-
normalize(addr);
336+
addr = normalize(addr);
337337
a += addr.get_dns_label();
338338
}
339339
a += "address.alt";
@@ -349,8 +349,7 @@ class server_identifier {
349349
a += '.';
350350
}
351351
ipv6_array_t addr = std::get<ipv6_array_t>(host_id);
352-
ipv6_address tmp = get_ipv6_address(addr);
353-
// normalize(tmp);
352+
ipv6_address tmp = normalize(get_ipv6_address(addr));
354353
a += tmp.get_dns_label();
355354
}
356355
a += "address.alt";
@@ -495,7 +494,7 @@ class server_identifier {
495494
{ "[::ffff:91.222.113.90]:5000", "_5000.--ffff-5bde-715a.address.alt", 5000 }, // IPv6 addr with embedded IPv4 addr, square braces, and port number
496495
{ "2001:db8::2:1", "2001-db8--2-1.address.alt", {} }, // IPv6 addr with zero compression
497496
{ "240d:c000:2010:1a58:0:95fe:d8b7:5a8f", "240d-c000-2010-1a58-0-95fe-d8b7-5a8f.address.alt", {} }, // IPv6 addr without zero compression
498-
{ "abcd:888::2:1", "abcd-888--2-1.address.alt", {} }, // IPv6 addr that could be confused for server:port
497+
{ "abcd:888::2:1", "fd00--1.address.alt", {} }, // Non-global IPv6 addr that could be confused for server:port
499498
{ "cisco.com:443", "_443.cisco.com", 443 }, // FQDN with port number
500499
{ ":8080", "_8080.missing.alt", 8080 }, // missing FQDN with port number
501500
{ "cisco.com.:443", "_443.cisco.com", 443 }, // trailing dot with port number
@@ -524,7 +523,7 @@ inline std::string normalize_ip_address(const std::string &s) {
524523
return ""; // error: trailing data after address
525524
}
526525
ipv4_address addr = addr_str.value.get_value();
527-
normalize(addr);
526+
addr = normalize(addr);
528527
return addr.get_string();
529528
}
530529
if (lookahead<ipv6_address_string> addr_str{d}) {
@@ -533,7 +532,7 @@ inline std::string normalize_ip_address(const std::string &s) {
533532
return ""; // error: trailing data after address
534533
}
535534
ipv6_address addr = get_ipv6_address(addr_str.value.get_value_array());
536-
normalize(addr);
535+
addr = normalize(addr);
537536
return addr.get_string();
538537
}
539538
return ""; // error: s is neither an ipv4 nor an ipv6 address

test/compare-stats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def read_merc_data(in_file):
6363
continue
6464

6565
src_ip = r['src_ip']
66-
dst_ip = normalize_address(r['dst_ip'])
66+
dst_ip = r['dst_ip']
6767
dst_port = r['dst_port']
6868
user_agent = ''
6969
server_name = ''

unit_tests/libmerc_driver_fdc.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ SCENARIO("test mercury_packet_processor_get_analysis_context_fdc for http reques
614614

615615
THEN("FDC should be written to output buffer") {
616616
REQUIRE(bytes_written != fdc_return::FDC_WRITE_INSUFFICIENT_SPACE);
617-
REQUIRE(bytes_written == 122);
617+
REQUIRE(bytes_written == 136);
618618
REQUIRE(fdc_buffer_len == max_buffer_allocation);
619619
}
620620
mercury_packet_processor_destruct(mpp);

0 commit comments

Comments
 (0)