Skip to content

Commit 87d392a

Browse files
davidmcgrewGitHub Enterprise
authored andcommitted
Merge pull request #466 from network-intelligence/dev
Merging dev into trunk
2 parents bf67506 + 633fcbc commit 87d392a

File tree

11 files changed

+380
-180
lines changed

11 files changed

+380
-180
lines changed

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.7.0
1+
2.7.1

doc/CHANGELOG.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
# CHANGELOG for Mercury
2-
* Added a new configuration option, minimize-ram, which reduces
3-
* the RAM usage of mercury library when enabled
42

53
## VERSION 2.7.1
64
* Updated QUIC reassembly logic for reordered QUIC crypto frames
5+
* Refactored the IP subnet reading code to minimize the amount of
6+
temporary RAM needed
7+
* Added a new configuration option, minimize-ram, which reduces
8+
the RAM usage of mercury library when enabled
9+
* Added changes to allow classifier to use custom weights
10+
and introduced new cython interface
711

812
## VERSION 2.7.0
913
* Added minimal RDP (Remote Desktop Protocol) support, which

src/cython/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '2.7.0'
1+
__version__ = '2.7.1'

src/cython/mercury.pyx

Lines changed: 71 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ from cython.operator import dereference
3131
# CC=g++ CXX=g++ python setup.py install
3232

3333
# TODO: actually handle version
34-
__version__ = '2.7.0'
34+
__version__ = '2.7.1'
3535

3636
# imports from mercury's dns
3737
cdef extern from "../libmerc/dns.h":
@@ -116,10 +116,10 @@ cdef extern from "../libmerc/analysis.h":
116116
cdef cppclass classifier:
117117
analysis_result perform_analysis(const char *fp_str, const char *server_name, const char *dst_ip, uint16_t dst_port, const char *user_agent)
118118

119-
# analysis_result perform_analysis_with_weights(const char *fp_str, const char *server_name, const char *dst_ip, uint16_t dst_port, const char *user_agent,
120-
# long double new_as_weight, long double new_domain_weight,
121-
# long double new_port_weight, long double new_ip_weight,
122-
# long double new_sni_weight, long double new_ua_weight)
119+
analysis_result perform_analysis_with_weights(const char *fp_str, const char *server_name, const char *dst_ip, uint16_t dst_port, const char *user_agent,
120+
double new_as_weight, double new_domain_weight,
121+
double new_port_weight, double new_ip_weight,
122+
double new_sni_weight, double new_ua_weight)
123123

124124

125125
cdef extern from "../libmerc/watchlist.hpp":
@@ -435,73 +435,72 @@ cdef class Mercury:
435435

436436
return result
437437

438-
# cpdef dict perform_analysis_with_weights(self, str fp_str, str server_name, str dst_ip, int dst_port, str user_agent,
439-
# long double new_as_weight, long double new_domain_weight,
440-
# long double new_port_weight, long double new_ip_weight,
441-
# long double new_sni_weight, long double new_ua_weight):
442-
# """
443-
# Directly call into mercury analysis functionality by providing all needed data features. Additionally,
444-
# supply custom weights for each data feature.
445-
#
446-
# :param fp_str: mercury-generated network protocol fingerprint
447-
# :type fp_str: str
448-
# :param server_name: The visible, fully qualified domain name, found in the server_name extension or the HTTP Host field
449-
# :type server_name: str
450-
# :param dst_ip: The destination IP address associated with the packet of interest
451-
# :type dst_ip: str
452-
# :param dst_port: The destination port associated with the packet of interest
453-
# :type dst_port: int
454-
# :param user_agent: If analyzing an HTTP packet, provide the contents of the HTTP User-Agent field
455-
# :type user_agent: str
456-
# :param new_as_weight: Updated weight for the Autonomous System data feature
457-
# :type new_as_weight: long double
458-
# :param new_domain_weight: Updated weight for the domain name data feature
459-
# :type new_domain_weight: long double
460-
# :param new_port_weight: Updated weight for the destination port data feature
461-
# :type new_port_weight: long double
462-
# :param new_ip_weight: Updated weight for the destination IP address data feature
463-
# :type new_ip_weight: long double
464-
# :param new_sni_weight: Updated weight for the server_name data feature
465-
# :type new_sni_weight: long double
466-
# :param new_ua_weight: Updated weight for the User-Agent data feature
467-
# :type new_ua_weight: long double
468-
# :return: JSON-encoded analysis output
469-
# :rtype: dict
470-
# """
471-
# if not self.do_analysis:
472-
# print(f'error: classifier not loaded (is do_analysis set to True?)')
473-
# return None
474-
#
475-
# cdef bytes fp_str_b = fp_str.encode()
476-
# cdef char* fp_str_c = fp_str_b
477-
# cdef bytes server_name_b = server_name.encode()
478-
# cdef char* server_name_c = server_name_b
479-
# cdef bytes dst_ip_b = dst_ip.encode()
480-
# cdef char* dst_ip_c = dst_ip_b
481-
# if user_agent == None:
482-
# user_agent = 'None'
483-
# cdef bytes user_agent_b = user_agent.encode()
484-
# cdef char* user_agent_c = user_agent_b
485-
# if user_agent == 'None':
486-
# user_agent_c = NULL
487-
#
488-
# cdef analysis_result ar = self.clf.perform_analysis_with_weights(fp_str_c, server_name_c, dst_ip_c, dst_port, user_agent_c,
489-
# new_as_weight, new_domain_weight, new_port_weight,
490-
# new_ip_weight, new_sni_weight, new_ua_weight)
491-
#
492-
# cdef fingerprint_status fp_status_enum = ar.status
493-
# fp_status = fp_status_dict[fp_status_enum]
494-
#
495-
# cdef dict result = {}
496-
# result['fingerprint_info'] = {}
497-
# result['fingerprint_info']['status'] = fp_status
498-
# result['analysis'] = {}
499-
# result['analysis']['process'] = ar.max_proc.decode('UTF-8')
500-
# result['analysis']['score'] = ar.max_score
501-
# result['analysis']['malware'] = ar.max_mal
502-
# result['analysis']['p_malware'] = ar.malware_prob
503-
#
504-
# return result
438+
cpdef dict perform_analysis_with_weights(self, str fp_str, str server_name, str dst_ip, int dst_port, str user_agent,
439+
double new_as_weight, double new_domain_weight,
440+
double new_port_weight, double new_ip_weight,
441+
double new_sni_weight, double new_ua_weight):
442+
"""
443+
Directly call into mercury analysis functionality by providing all needed data features. Additionally,
444+
supply custom weights for each data feature.
445+
446+
:param fp_str: mercury-generated network protocol fingerprint
447+
:type fp_str: str
448+
:param server_name: The visible, fully qualified domain name, found in the server_name extension or the HTTP Host field
449+
:type server_name: str
450+
:param dst_ip: The destination IP address associated with the packet of interest
451+
:type dst_ip: str
452+
:param dst_port: The destination port associated with the packet of interest
453+
:type dst_port: int
454+
:param user_agent: If analyzing an HTTP packet, provide the contents of the HTTP User-Agent field
455+
:type user_agent: str
456+
:param new_as_weight: Updated weight for the Autonomous System data feature
457+
:type new_as_weight: long double
458+
:param new_domain_weight: Updated weight for the domain name data feature
459+
:type new_domain_weight: long double
460+
:param new_port_weight: Updated weight for the destination port data feature
461+
:type new_port_weight: long double
462+
:param new_ip_weight: Updated weight for the destination IP address data feature
463+
:type new_ip_weight: long double
464+
:param new_sni_weight: Updated weight for the server_name data feature
465+
:type new_sni_weight: long double
466+
:param new_ua_weight: Updated weight for the User-Agent data feature
467+
:type new_ua_weight: long double
468+
:return: JSON-encoded analysis output
469+
:rtype: dict
470+
"""
471+
if not self.do_analysis:
472+
print(f'error: classifier not loaded (is do_analysis set to True?)')
473+
return None
474+
475+
cdef bytes fp_str_b = fp_str.encode()
476+
cdef char* fp_str_c = fp_str_b
477+
cdef bytes server_name_b = server_name.encode()
478+
cdef char* server_name_c = server_name_b
479+
cdef bytes dst_ip_b = dst_ip.encode()
480+
cdef char* dst_ip_c = dst_ip_b
481+
if user_agent == None:
482+
user_agent = 'None'
483+
cdef bytes user_agent_b = user_agent.encode()
484+
cdef char* user_agent_c = user_agent_b
485+
486+
cdef analysis_result ar = self.clf.perform_analysis_with_weights(
487+
fp_str_c, server_name_c, dst_ip_c, dst_port, user_agent_c,
488+
new_as_weight, new_domain_weight, new_port_weight,
489+
new_ip_weight, new_sni_weight, new_ua_weight)
490+
491+
cdef fingerprint_status fp_status_enum = ar.status
492+
fp_status = fp_status_dict[fp_status_enum]
493+
494+
cdef dict result = {}
495+
result['fingerprint_info'] = {}
496+
result['fingerprint_info']['status'] = fp_status
497+
result['analysis'] = {}
498+
result['analysis']['process'] = ar.max_proc.decode('UTF-8')
499+
result['analysis']['score'] = ar.max_score
500+
result['analysis']['malware'] = ar.max_mal
501+
result['analysis']['p_malware'] = ar.malware_prob
502+
503+
return result
505504

506505
cdef list extract_attributes(self, analysis_result ar):
507506
cdef char tags_buf[8192]
@@ -551,8 +550,6 @@ cdef class Mercury:
551550
user_agent = 'None'
552551
cdef bytes user_agent_b = user_agent.encode()
553552
cdef char* user_agent_c = user_agent_b
554-
if user_agent == 'None':
555-
user_agent_c = NULL
556553

557554
cdef analysis_result ar = self.clf.perform_analysis(fp_str_c, server_name_c, dst_ip_c, dst_port, user_agent_c)
558555

src/libmerc/addr.cc

Lines changed: 81 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -93,28 +93,46 @@ uint32_t subnet_data::get_asn_info(const char* dst_ip) const {
9393
return 0;
9494
}
9595

96-
int subnet_data::process_line(std::string &line_str) {
96+
int subnet_data::process_asn_subnets(const std::vector<std::string> &subnets) {
9797

98-
// set the prefix[num] to the subnet and ASN found in line
99-
if (lct_subnet_set_from_string(&prefix[num], line_str.c_str()) != 0) {
100-
printf_err(log_err, "could not parse subnet string '%s'\n", line_str.c_str());
101-
return -1; // failure
98+
prefix = (lct_subnet_t *)calloc(sizeof(lct_subnet_t), subnets.size());
99+
if (prefix == nullptr) {
100+
throw std::runtime_error("error: could not initialize subnet_data");
101+
}
102+
103+
// Add Special and Private subnets to ASN subnets
104+
//
105+
// parse the subnets and ASN strings // start with the RFC 1918 and 3927 private and link local
106+
// subnets as a basis for any table set
107+
//
108+
// num += init_private_subnets(&prefix[num], BGP_MAX_ENTRIES);
109+
//
110+
// fill up the rest of the array with reserved IP subnets
111+
//
112+
// num += init_special_subnets(&prefix[num], BGP_MAX_ENTRIES);
113+
114+
for (const std::string &line_str : subnets) {
115+
// set the prefix[num] to the subnet and ASN found in line
116+
if (lct_subnet_set_from_string(&prefix[num], line_str.c_str()) != 0) {
117+
printf_err(log_err, "could not parse subnet string '%s'\n", line_str.c_str());
118+
return -1; // failure
119+
}
120+
num++;
102121
}
103-
num++;
104122
return 0; // success
105123
}
106124

107125
int subnet_data::lct_add_domain_mapping(uint32_t &addr, uint8_t &mask_length, std::string &domain_name, std::unordered_map<uint32_t, ssize_t> &subnet_map) {
108126
uint32_t domain_idx;
109-
if (domains_watchlist.find(domain_name) == domains_watchlist.end()) { // new domain; assing a domain id and save in the domain watchlist
127+
if (domains_watchlist.find(domain_name) == domains_watchlist.end()) { // new domain; assign a domain id and save in the domain watchlist
110128
domain_idx = domains_watchlist.size();
111129
domains_watchlist[domain_name] = domain_idx;
112130
} else {
113-
domain_idx = domains_watchlist[domain_name]; // domain already see; retrieve domain id
131+
domain_idx = domains_watchlist[domain_name]; // domain already seen; retrieve domain id
114132
}
115133

116134
lct_subnet<uint32_t> *subnet_itr;
117-
if (subnet_map.find(addr) != subnet_map.end()) { // subnet presetn in map, domain_idx needs to be appended
135+
if (subnet_map.find(addr) != subnet_map.end()) { // subnet present in map, domain_idx needs to be appended
118136
subnet_itr = &domains_prefix[subnet_map[addr]];
119137
if (subnet_itr->info.type == IP_SUBNET_DOMAIN && subnet_itr->addr == addr && subnet_itr->len == mask_length) {
120138
uint8_t *old_arr = subnet_itr->info.domain.domain_idx_arr;
@@ -150,58 +168,67 @@ int subnet_data::lct_add_domain_mapping(uint32_t &addr, uint8_t &mask_length, st
150168
return 0; // success
151169
}
152170

153-
int subnet_data::process_domain_mappings_line(std::string &line_str, std::unordered_map<uint32_t, ssize_t> &subnet_map) {
154-
rapidjson::Document domain_obj;
155-
domain_obj.Parse(line_str.c_str());
156-
if(!domain_obj.IsObject()) {
157-
printf_err(log_warning, "invalid JSON line in resource file\n");
158-
return -1;
171+
int subnet_data::process_domain_mapping_subnets(const std::vector<std::string> &subnets) {
172+
173+
std::unordered_map<uint32_t, ssize_t> subnet_map;
174+
domains_prefix = (lct_subnet_t *)calloc(sizeof(lct_subnet_t), subnets.size());
175+
if (domains_prefix == nullptr) {
176+
throw std::runtime_error("error: could not initialize domains_prefix");
159177
}
160178

161-
std::string subnet_type;
162-
std::string subnet_str;
163-
std::string subnet_tag;
164-
165-
uint32_t addr;
166-
unsigned char *dq = (unsigned char *)&addr;
167-
uint8_t mask_length;
168-
constexpr unsigned int bits_in_T = sizeof(uint32_t) * 8;
179+
for (const std::string &line_str : subnets) {
180+
rapidjson::Document domain_obj;
181+
domain_obj.Parse(line_str.c_str());
182+
if(!domain_obj.IsObject()) {
183+
printf_err(log_warning, "invalid JSON line in resource file\n");
184+
return -1;
185+
}
169186

170-
if (domain_obj.HasMember("subnet") && domain_obj["subnet"].IsString()) {
171-
subnet_str = domain_obj["subnet"].GetString();
172-
}
173-
else {
174-
return -1;
175-
}
176-
if (domain_obj.HasMember("type") && domain_obj["type"].IsString()) {
177-
subnet_type = domain_obj["type"].GetString();
178-
}
179-
else {
180-
return -1;
181-
}
182-
if (domain_obj.HasMember("tag") && domain_obj["tag"].IsString()) {
183-
subnet_tag = domain_obj["tag"].GetString();
184-
}
185-
else {
186-
return -1;
187-
}
187+
std::string subnet_type;
188+
std::string subnet_str;
189+
std::string subnet_tag;
190+
191+
uint32_t addr;
192+
unsigned char *dq = (unsigned char *)&addr;
193+
uint8_t mask_length;
194+
constexpr unsigned int bits_in_T = sizeof(uint32_t) * 8;
188195

189-
if (subnet_type == "domain_mapping") {
190-
int num_items_parsed = sscanf(subnet_str.c_str(),"%hhu.%hhu.%hhu.%hhu/%hhu",
191-
dq + 3, dq + 2, dq + 1, dq, &mask_length);
192-
if (num_items_parsed == 5) { // invalid IP or IPv6
193-
if ((mask_length == 0) || (mask_length > bits_in_T)) {
194-
fprintf(stderr, "ERROR: %u is not a valid prefix length\n", mask_length);
195-
return -1; // failure
196-
}
196+
if (domain_obj.HasMember("subnet") && domain_obj["subnet"].IsString()) {
197+
subnet_str = domain_obj["subnet"].GetString();
198+
}
199+
else {
200+
return -1;
201+
}
202+
if (domain_obj.HasMember("type") && domain_obj["type"].IsString()) {
203+
subnet_type = domain_obj["type"].GetString();
204+
}
205+
else {
206+
return -1;
207+
}
208+
if (domain_obj.HasMember("tag") && domain_obj["tag"].IsString()) {
209+
subnet_tag = domain_obj["tag"].GetString();
210+
}
211+
else {
212+
return -1;
213+
}
197214

198-
if (lct_add_domain_mapping(addr, mask_length, subnet_tag, subnet_map) != 0) {
199-
return -1; // failure
215+
if (subnet_type == "domain_mapping") {
216+
int num_items_parsed = sscanf(subnet_str.c_str(),"%hhu.%hhu.%hhu.%hhu/%hhu",
217+
dq + 3, dq + 2, dq + 1, dq, &mask_length);
218+
if (num_items_parsed == 5) { // invalid IP or IPv6
219+
if ((mask_length == 0) || (mask_length > bits_in_T)) {
220+
fprintf(stderr, "ERROR: %u is not a valid prefix length\n", mask_length);
221+
return -1; // failure
222+
}
223+
224+
if (lct_add_domain_mapping(addr, mask_length, subnet_tag, subnet_map) != 0) {
225+
return -1; // failure
226+
}
200227
}
201228
}
202-
}
203-
else if (subnet_type == "proxy" || subnet_type == "sinkhole") {
204-
domain_faking_exceptions.insert(addr);
229+
else if (subnet_type == "proxy" || subnet_type == "sinkhole") {
230+
domain_faking_exceptions.insert(addr);
231+
}
205232
}
206233

207234
return 0;

0 commit comments

Comments
 (0)