Skip to content

Commit

Permalink
perf: memoized some module functions (#311)
Browse files Browse the repository at this point in the history
There are a bunch of functions in the elf, pe, and macho modules who's results can be memoized (imphash, import_md5, etc). This can result in a significant performance boost in cases where a large number of calls to these functions are made (usually because a large number of rules are used in a single scanner).

Co-authored-by: Victor M. Alvarez <[email protected]>
  • Loading branch information
JonathanAnbary and plusvic authored Mar 3, 2025
1 parent a751199 commit 0e90769
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 2 deletions.
44 changes: 42 additions & 2 deletions lib/src/modules/elf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ This allows creating YARA rules based on ELF metadata, including segments
and sections information, exported symbols, target platform, etc.
*/

use std::cell::RefCell;

use itertools::Itertools;
use lazy_static::lazy_static;
use md5::{Digest, Md5};
Expand All @@ -18,13 +20,32 @@ pub mod parser;
#[cfg(test)]
mod tests;

thread_local!(
static IMPORT_MD5_CACHE: RefCell<Option<String>> =
const { RefCell::new(None) };
static TLSH_CACHE: RefCell<Option<String>> = const { RefCell::new(None) };
);

#[module_main]
fn main(data: &[u8], _meta: Option<&[u8]>) -> ELF {
IMPORT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
TLSH_CACHE.with(|cache| *cache.borrow_mut() = None);
parser::ElfParser::new().parse(data).unwrap_or_else(|_| ELF::new())
}

#[module_export]
fn import_md5(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached = IMPORT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let elf = ctx.module_output::<ELF>()?;

let symbols = if elf.dynsym.is_empty() {
Expand All @@ -48,6 +69,10 @@ fn import_md5(ctx: &mut ScanContext) -> Option<RuntimeString> {

let digest = format!("{:x}", hasher.finalize());

IMPORT_MD5_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}

Expand Down Expand Up @@ -78,6 +103,17 @@ lazy_static! {
/// [1]: https://github.com/trendmicro/telfhash
#[module_export]
fn telfhash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached = TLSH_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let elf = ctx.module_output::<ELF>()?;

// Prefer dynsym over symbtab.
Expand Down Expand Up @@ -127,7 +163,11 @@ fn telfhash(ctx: &mut ScanContext) -> Option<RuntimeString> {

builder.update(comma_separated_names.as_bytes());

let tlsh = builder.build().ok()?;
let digest = builder.build().ok()?.hash();

Some(RuntimeString::new(tlsh.hash()))
IMPORT_MD5_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}
102 changes: 102 additions & 0 deletions lib/src/modules/macho/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
//! both protobuf structure fields and constants. This together with
//! also exported functions can be later used in YARA rules.
use std::cell::RefCell;

use crate::modules::prelude::*;
use crate::modules::protos::macho::*;
use bstr::BString;
Expand All @@ -15,6 +17,19 @@ mod parser;
#[cfg(test)]
mod tests;

thread_local!(
static DYLIB_MD5_CACHE: RefCell<Option<String>> =
const { RefCell::new(None) };
static ENTITLEMENT_MD5_CACHE: RefCell<Option<String>> =
const { RefCell::new(None) };
static EXPORT_MD5_CACHE: RefCell<Option<String>> =
const { RefCell::new(None) };
static IMPORT_MD5_CACHE: RefCell<Option<String>> =
const { RefCell::new(None) };
static SYM_MD5_CACHE: RefCell<Option<String>> =
const { RefCell::new(None) };
);

/// Get the index of a Mach-O file within a fat binary based on CPU type.
///
/// This function iterates through the architecture types contained in a
Expand Down Expand Up @@ -320,6 +335,17 @@ fn has_export(ctx: &ScanContext, export: RuntimeString) -> Option<bool> {
/// Returns a md5 hash of the dylibs designated in the mach-o binary
#[module_export]
fn dylib_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached = DYLIB_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let macho = ctx.module_output::<Macho>()?;
let mut dylibs_to_hash = &macho.dylibs;

Expand Down Expand Up @@ -351,12 +377,29 @@ fn dylib_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
md5_hash.update(dylibs_to_hash.as_bytes());

let digest = format!("{:x}", md5_hash.finalize());

DYLIB_MD5_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}

/// Returns a md5 hash of the entitlements designated in the mach-o binary
#[module_export]
fn entitlement_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached =
ENTITLEMENT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let macho = ctx.module_output::<Macho>()?;
let mut entitlements_to_hash = &macho.entitlements;

Expand All @@ -383,12 +426,28 @@ fn entitlement_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
md5_hash.update(entitlements_str.as_bytes());

let digest = format!("{:x}", md5_hash.finalize());

ENTITLEMENT_MD5_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}

/// Returns a md5 hash of the export symbols in the mach-o binary
#[module_export]
fn export_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached = EXPORT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let macho = ctx.module_output::<Macho>()?;
let mut exports_to_hash = &macho.exports;

Expand All @@ -415,12 +474,28 @@ fn export_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
md5_hash.update(exports_str.as_bytes());

let digest = format!("{:x}", md5_hash.finalize());

EXPORT_MD5_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}

/// Returns a md5 hash of the imported symbols in the mach-o binary
#[module_export]
fn import_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached = IMPORT_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let macho = ctx.module_output::<Macho>()?;
let mut imports_to_hash = &macho.imports;

Expand All @@ -447,12 +522,28 @@ fn import_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
md5_hash.update(imports_str.as_bytes());

let digest = format!("{:x}", md5_hash.finalize());

IMPORT_MD5_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}

/// Returns a md5 hash of the symbol table in the mach-o binary
#[module_export]
fn sym_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached = SYM_MD5_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let macho = ctx.module_output::<Macho>()?;
let mut symtab_to_hash = &macho.symtab.entries;

Expand All @@ -479,11 +570,22 @@ fn sym_hash(ctx: &mut ScanContext) -> Option<RuntimeString> {
md5_hash.update(symtab_hash_entries);

let digest = format!("{:x}", md5_hash.finalize());

SYM_MD5_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}

#[module_main]
fn main(data: &[u8], _meta: Option<&[u8]>) -> Macho {
DYLIB_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
ENTITLEMENT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
EXPORT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
IMPORT_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);
SYM_MD5_CACHE.with(|cache| *cache.borrow_mut() = None);

match parser::MachO::parse(data) {
Ok(macho) => macho.into(),
Err(_) => Macho::new(),
Expand Down
38 changes: 38 additions & 0 deletions lib/src/modules/pe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ This allows creating YARA rules based on PE metadata, including sections,
imports and exports, resources, etc.
*/

use std::cell::RefCell;
use std::rc::Rc;
use std::slice::Iter;

Expand All @@ -27,8 +28,18 @@ mod authenticode;
pub mod parser;
mod rva2off;

thread_local!(
static IMPHASH_CACHE: RefCell<Option<String>> =
const { RefCell::new(None) };

static CHECKSUM_CACHE: RefCell<Option<i64>> = const { RefCell::new(None) };
);

#[module_main]
fn main(data: &[u8], _meta: Option<&[u8]>) -> PE {
IMPHASH_CACHE.with(|cache| *cache.borrow_mut() = None);
CHECKSUM_CACHE.with(|cache| *cache.borrow_mut() = None);

match parser::PE::parse(data) {
Ok(pe) => pe.into(),
Err(_) => {
Expand Down Expand Up @@ -112,6 +123,13 @@ fn calculate_checksum(ctx: &mut ScanContext) -> Option<i64> {
// in the header is not aligned to a 4-bytes boundary. Such files are not
// very common, but they do exist. Example:
// af3f20a9272489cbef4281c8c86ad42ccfb04ccedd3ada1e8c26939c726a4c8e
let cached: Option<i64> =
CHECKSUM_CACHE.with(|cache| -> Option<i64> { *cache.borrow() });

if cached.is_some() {
return cached;
}

let pe = ctx.module_output::<PE>()?;
let data = ctx.scanned_data();
let mut sum: u32 = 0;
Expand Down Expand Up @@ -157,6 +175,10 @@ fn calculate_checksum(ctx: &mut ScanContext) -> Option<i64> {
sum &= 0xffff;
sum += data.len() as u32;

CHECKSUM_CACHE.with(|cache| {
*cache.borrow_mut() = Some(sum.into());
});

Some(sum.into())
}

Expand Down Expand Up @@ -207,6 +229,17 @@ fn section_index_offset(ctx: &ScanContext, offset: i64) -> Option<i64> {
/// The resulting hash string is consistently in lowercase.
#[module_export]
fn imphash(ctx: &mut ScanContext) -> Option<RuntimeString> {
let cached = IMPHASH_CACHE.with(|cache| -> Option<RuntimeString> {
cache
.borrow()
.as_deref()
.map(|s| RuntimeString::from_slice(ctx, s.as_bytes()))
});

if cached.is_some() {
return cached;
}

let pe = ctx.module_output::<PE>()?;

if !pe.is_pe() {
Expand Down Expand Up @@ -239,6 +272,11 @@ fn imphash(ctx: &mut ScanContext) -> Option<RuntimeString> {
}

let digest = format!("{:x}", md5_hash.finalize());

IMPHASH_CACHE.with(|cache| {
*cache.borrow_mut() = Some(digest.clone());
});

Some(RuntimeString::new(digest))
}

Expand Down

0 comments on commit 0e90769

Please sign in to comment.