Skip to content

Commit 0e90769

Browse files
perf: memoized some module functions (#311)
There are a bunch of functions in the elf, pe, and macho modules who's results can be memoized (imphash, import_md5, etc). This can result in a significant performance boost in cases where a large number of calls to these functions are made (usually because a large number of rules are used in a single scanner). Co-authored-by: Victor M. Alvarez <[email protected]>
1 parent a751199 commit 0e90769

File tree

3 files changed

+182
-2
lines changed

3 files changed

+182
-2
lines changed

lib/src/modules/elf/mod.rs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ This allows creating YARA rules based on ELF metadata, including segments
44
and sections information, exported symbols, target platform, etc.
55
*/
66

7+
use std::cell::RefCell;
8+
79
use itertools::Itertools;
810
use lazy_static::lazy_static;
911
use md5::{Digest, Md5};
@@ -18,13 +20,32 @@ pub mod parser;
1820
#[cfg(test)]
1921
mod tests;
2022

23+
thread_local!(
24+
static IMPORT_MD5_CACHE: RefCell<Option<String>> =
25+
const { RefCell::new(None) };
26+
static TLSH_CACHE: RefCell<Option<String>> = const { RefCell::new(None) };
27+
);
28+
2129
#[module_main]
2230
fn main(data: &[u8], _meta: Option<&[u8]>) -> ELF {
31+
IMPORT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
32+
TLSH_CACHE.with(|cache| *cache.borrow_mut() = None);
2333
parser::ElfParser::new().parse(data).unwrap_or_else(|_| ELF::new())
2434
}
2535

2636
#[module_export]
2737
fn import_md5(ctx: &mut ScanContext) -> Option<RuntimeString> {
38+
let cached = IMPORT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
39+
cache
40+
.borrow()
41+
.as_deref()
42+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
43+
});
44+
45+
if cached.is_some() {
46+
return cached;
47+
}
48+
2849
let elf = ctx.module_output::<ELF>()?;
2950

3051
let symbols = if elf.dynsym.is_empty() {
@@ -48,6 +69,10 @@ fn import_md5(ctx: &mut ScanContext) -> Option<RuntimeString> {
4869

4970
let digest = format!("{:x}", hasher.finalize());
5071

72+
IMPORT_MD5_CACHE.with(|cache| {
73+
*cache.borrow_mut() = Some(digest.clone());
74+
});
75+
5176
Some(RuntimeString::new(digest))
5277
}
5378

@@ -78,6 +103,17 @@ lazy_static! {
78103
/// [1]: https://github.com/trendmicro/telfhash
79104
#[module_export]
80105
fn telfhash(ctx: &mut ScanContext) -> Option<RuntimeString> {
106+
let cached = TLSH_CACHE.with(|cache| -> Option<RuntimeString> {
107+
cache
108+
.borrow()
109+
.as_deref()
110+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
111+
});
112+
113+
if cached.is_some() {
114+
return cached;
115+
}
116+
81117
let elf = ctx.module_output::<ELF>()?;
82118

83119
// Prefer dynsym over symbtab.
@@ -127,7 +163,11 @@ fn telfhash(ctx: &mut ScanContext) -> Option<RuntimeString> {
127163

128164
builder.update(comma_separated_names.as_bytes());
129165

130-
let tlsh = builder.build().ok()?;
166+
let digest = builder.build().ok()?.hash();
131167

132-
Some(RuntimeString::new(tlsh.hash()))
168+
IMPORT_MD5_CACHE.with(|cache| {
169+
*cache.borrow_mut() = Some(digest.clone());
170+
});
171+
172+
Some(RuntimeString::new(digest))
133173
}

lib/src/modules/macho/mod.rs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
//! both protobuf structure fields and constants. This together with
66
//! also exported functions can be later used in YARA rules.
77
8+
use std::cell::RefCell;
9+
810
use crate::modules::prelude::*;
911
use crate::modules::protos::macho::*;
1012
use bstr::BString;
@@ -15,6 +17,19 @@ mod parser;
1517
#[cfg(test)]
1618
mod tests;
1719

20+
thread_local!(
21+
static DYLIB_MD5_CACHE: RefCell<Option<String>> =
22+
const { RefCell::new(None) };
23+
static ENTITLEMENT_MD5_CACHE: RefCell<Option<String>> =
24+
const { RefCell::new(None) };
25+
static EXPORT_MD5_CACHE: RefCell<Option<String>> =
26+
const { RefCell::new(None) };
27+
static IMPORT_MD5_CACHE: RefCell<Option<String>> =
28+
const { RefCell::new(None) };
29+
static SYM_MD5_CACHE: RefCell<Option<String>> =
30+
const { RefCell::new(None) };
31+
);
32+
1833
/// Get the index of a Mach-O file within a fat binary based on CPU type.
1934
///
2035
/// This function iterates through the architecture types contained in a
@@ -320,6 +335,17 @@ fn has_export(ctx: &ScanContext, export: RuntimeString) -> Option<bool> {
320335
/// Returns a md5 hash of the dylibs designated in the mach-o binary
321336
#[module_export]
322337
fn dylib_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
338+
let cached = DYLIB_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
339+
cache
340+
.borrow()
341+
.as_deref()
342+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
343+
});
344+
345+
if cached.is_some() {
346+
return cached;
347+
}
348+
323349
let macho = ctx.module_output::<Macho>()?;
324350
let mut dylibs_to_hash = &macho.dylibs;
325351

@@ -351,12 +377,29 @@ fn dylib_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
351377
md5_hash.update(dylibs_to_hash.as_bytes());
352378

353379
let digest = format!("{:x}", md5_hash.finalize());
380+
381+
DYLIB_MD5_CACHE.with(|cache| {
382+
*cache.borrow_mut() = Some(digest.clone());
383+
});
384+
354385
Some(RuntimeString::new(digest))
355386
}
356387

357388
/// Returns a md5 hash of the entitlements designated in the mach-o binary
358389
#[module_export]
359390
fn entitlement_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
391+
let cached =
392+
ENTITLEMENT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
393+
cache
394+
.borrow()
395+
.as_deref()
396+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
397+
});
398+
399+
if cached.is_some() {
400+
return cached;
401+
}
402+
360403
let macho = ctx.module_output::<Macho>()?;
361404
let mut entitlements_to_hash = &macho.entitlements;
362405

@@ -383,12 +426,28 @@ fn entitlement_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
383426
md5_hash.update(entitlements_str.as_bytes());
384427

385428
let digest = format!("{:x}", md5_hash.finalize());
429+
430+
ENTITLEMENT_MD5_CACHE.with(|cache| {
431+
*cache.borrow_mut() = Some(digest.clone());
432+
});
433+
386434
Some(RuntimeString::new(digest))
387435
}
388436

389437
/// Returns a md5 hash of the export symbols in the mach-o binary
390438
#[module_export]
391439
fn export_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
440+
let cached = EXPORT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
441+
cache
442+
.borrow()
443+
.as_deref()
444+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
445+
});
446+
447+
if cached.is_some() {
448+
return cached;
449+
}
450+
392451
let macho = ctx.module_output::<Macho>()?;
393452
let mut exports_to_hash = &macho.exports;
394453

@@ -415,12 +474,28 @@ fn export_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
415474
md5_hash.update(exports_str.as_bytes());
416475

417476
let digest = format!("{:x}", md5_hash.finalize());
477+
478+
EXPORT_MD5_CACHE.with(|cache| {
479+
*cache.borrow_mut() = Some(digest.clone());
480+
});
481+
418482
Some(RuntimeString::new(digest))
419483
}
420484

421485
/// Returns a md5 hash of the imported symbols in the mach-o binary
422486
#[module_export]
423487
fn import_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
488+
let cached = IMPORT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
489+
cache
490+
.borrow()
491+
.as_deref()
492+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
493+
});
494+
495+
if cached.is_some() {
496+
return cached;
497+
}
498+
424499
let macho = ctx.module_output::<Macho>()?;
425500
let mut imports_to_hash = &macho.imports;
426501

@@ -447,12 +522,28 @@ fn import_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
447522
md5_hash.update(imports_str.as_bytes());
448523

449524
let digest = format!("{:x}", md5_hash.finalize());
525+
526+
IMPORT_MD5_CACHE.with(|cache| {
527+
*cache.borrow_mut() = Some(digest.clone());
528+
});
529+
450530
Some(RuntimeString::new(digest))
451531
}
452532

453533
/// Returns a md5 hash of the symbol table in the mach-o binary
454534
#[module_export]
455535
fn sym_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
536+
let cached = SYM_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
537+
cache
538+
.borrow()
539+
.as_deref()
540+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
541+
});
542+
543+
if cached.is_some() {
544+
return cached;
545+
}
546+
456547
let macho = ctx.module_output::<Macho>()?;
457548
let mut symtab_to_hash = &macho.symtab.entries;
458549

@@ -479,11 +570,22 @@ fn sym_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
479570
md5_hash.update(symtab_hash_entries);
480571

481572
let digest = format!("{:x}", md5_hash.finalize());
573+
574+
SYM_MD5_CACHE.with(|cache| {
575+
*cache.borrow_mut() = Some(digest.clone());
576+
});
577+
482578
Some(RuntimeString::new(digest))
483579
}
484580

485581
#[module_main]
486582
fn main(data: &[u8], _meta: Option<&[u8]>) -> Macho {
583+
DYLIB_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
584+
ENTITLEMENT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
585+
EXPORT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
586+
IMPORT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
587+
SYM_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
588+
487589
match parser::MachO::parse(data) {
488590
Ok(macho) => macho.into(),
489591
Err(_) => Macho::new(),

lib/src/modules/pe/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ This allows creating YARA rules based on PE metadata, including sections,
44
imports and exports, resources, etc.
55
*/
66

7+
use std::cell::RefCell;
78
use std::rc::Rc;
89
use std::slice::Iter;
910

@@ -27,8 +28,18 @@ mod authenticode;
2728
pub mod parser;
2829
mod rva2off;
2930

31+
thread_local!(
32+
static IMPHASH_CACHE: RefCell<Option<String>> =
33+
const { RefCell::new(None) };
34+
35+
static CHECKSUM_CACHE: RefCell<Option<i64>> = const { RefCell::new(None) };
36+
);
37+
3038
#[module_main]
3139
fn main(data: &[u8], _meta: Option<&[u8]>) -> PE {
40+
IMPHASH_CACHE.with(|cache| *cache.borrow_mut() = None);
41+
CHECKSUM_CACHE.with(|cache| *cache.borrow_mut() = None);
42+
3243
match parser::PE::parse(data) {
3344
Ok(pe) => pe.into(),
3445
Err(_) => {
@@ -112,6 +123,13 @@ fn calculate_checksum(ctx: &mut ScanContext) -> Option<i64> {
112123
// in the header is not aligned to a 4-bytes boundary. Such files are not
113124
// very common, but they do exist. Example:
114125
// af3f20a9272489cbef4281c8c86ad42ccfb04ccedd3ada1e8c26939c726a4c8e
126+
let cached: Option<i64> =
127+
CHECKSUM_CACHE.with(|cache| -> Option<i64> { *cache.borrow() });
128+
129+
if cached.is_some() {
130+
return cached;
131+
}
132+
115133
let pe = ctx.module_output::<PE>()?;
116134
let data = ctx.scanned_data();
117135
let mut sum: u32 = 0;
@@ -157,6 +175,10 @@ fn calculate_checksum(ctx: &mut ScanContext) -> Option<i64> {
157175
sum &= 0xffff;
158176
sum += data.len() as u32;
159177

178+
CHECKSUM_CACHE.with(|cache| {
179+
*cache.borrow_mut() = Some(sum.into());
180+
});
181+
160182
Some(sum.into())
161183
}
162184

@@ -207,6 +229,17 @@ fn section_index_offset(ctx: &ScanContext, offset: i64) -> Option<i64> {
207229
/// The resulting hash string is consistently in lowercase.
208230
#[module_export]
209231
fn imphash(ctx: &mut ScanContext) -> Option<RuntimeString> {
232+
let cached = IMPHASH_CACHE.with(|cache| -> Option<RuntimeString> {
233+
cache
234+
.borrow()
235+
.as_deref()
236+
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
237+
});
238+
239+
if cached.is_some() {
240+
return cached;
241+
}
242+
210243
let pe = ctx.module_output::<PE>()?;
211244

212245
if !pe.is_pe() {
@@ -239,6 +272,11 @@ fn imphash(ctx: &mut ScanContext) -> Option<RuntimeString> {
239272
}
240273

241274
let digest = format!("{:x}", md5_hash.finalize());
275+
276+
IMPHASH_CACHE.with(|cache| {
277+
*cache.borrow_mut() = Some(digest.clone());
278+
});
279+
242280
Some(RuntimeString::new(digest))
243281
}
244282

0 commit comments

Comments
 (0)