Skip to content

Commit d7487c8

Browse files
committed
feat: implement API for clearing rules profiling data.
Also renames `most_expensive_rules` to the more straightforward `slowest_rules`.
1 parent 50180d8 commit d7487c8

File tree

11 files changed

+125
-63
lines changed

11 files changed

+125
-63
lines changed

.github/workflows/coverage.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
uses: taiki-e/install-action@cargo-llvm-cov
3131

3232
- name: Generate code coverage
33-
run: cargo llvm-cov --features=magic-module --workspace --lib --lcov --output-path lcov.info
33+
run: cargo llvm-cov --features=magic-module,rules-profiling --workspace --lib --lcov --output-path lcov.info
3434

3535
- name: Upload coverage to Codecov
3636
uses: codecov/codecov-action@v4

.github/workflows/tests.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,21 @@ jobs:
2626
- build: msrv
2727
os: ubuntu-latest
2828
rust: 1.78.0
29-
args: "--features=magic-module"
29+
args: "--features=magic-module,rules-profiling"
3030
rust_flags: "-Awarnings"
3131
experimental: false
3232

3333
- build: stable
3434
os: ubuntu-latest
3535
rust: stable
36-
args: "--features=magic-module"
36+
args: "--features=magic-module,rules-profiling"
3737
rust_flags: "-Awarnings"
3838
experimental: false
3939

4040
- build: nightly
4141
os: ubuntu-latest
4242
rust: nightly
43-
args: "--features=magic-module"
43+
args: "--features=magic-module,rules-profiling"
4444
# Link is currently failing with rust-lld (rust-lang/rust#124129)
4545
# Disable rust-lld with -Zlinker-features=-lld
4646
# See: https://github.com/dtolnay/linkme/commit/d13709bfd2c1278b4c8b6c846e2017b623923c0c
@@ -50,14 +50,14 @@ jobs:
5050
- build: macos
5151
os: macos-latest
5252
rust: stable
53-
args: ""
53+
args: "--features=rules-profiling"
5454
rust_flags: "-Awarnings"
5555
experimental: false
5656

5757
- build: win-msvc
5858
os: windows-latest
5959
rust: stable
60-
args: ""
60+
args: "--features=rules-profiling"
6161
rust_flags: "-Awarnings"
6262
experimental: false
6363

capi/include/yara_x.h

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ typedef void (*YRX_RULE_CALLBACK)(const struct YRX_RULE *rule,
229229
typedef void (*YRX_IMPORT_CALLBACK)(const char *module_name,
230230
void *user_data);
231231

232-
// Callback function passed to [`yrx_scanner_iter_most_expensive_rules`].
232+
// Callback function passed to [`yrx_scanner_iter_slowest_rules`].
233233
//
234234
// The callback function receives pointers to the namespace and rule name,
235235
// and two float numbers with the time spent by the rule matching patterns
@@ -240,11 +240,11 @@ typedef void (*YRX_IMPORT_CALLBACK)(const char *module_name,
240240
// data owned by the user.
241241
//
242242
// Requires the `rules-profiling` feature.
243-
typedef void (*YRX_MOST_EXPENSIVE_RULES_CALLBACK)(const char *namespace,
244-
const char *rule,
245-
double pattern_matching_time,
246-
double condition_exec_time,
247-
void *user_data);
243+
typedef void (*YRX_SLOWEST_RULES_CALLBACK)(const char *namespace,
244+
const char *rule,
245+
double pattern_matching_time,
246+
double condition_exec_time,
247+
void *user_data);
248248

249249
// Returns the error message for the most recent function in this API
250250
// invoked by the current thread.
@@ -708,15 +708,15 @@ enum YRX_RESULT yrx_scanner_set_global_float(struct YRX_SCANNER *scanner,
708708
const char *ident,
709709
double value);
710710

711-
// Iterates over the top N most expensive rules, calling the callback for
712-
// each rule.
711+
// Iterates over the slowest N rules, calling the callback for each rule.
713712
//
714-
// Requires the `rules-profiling` feature, otherwise the
713+
// Requires the `rules-profiling` feature, otherwise returns
714+
// [`YRX_RESULT::NOT_SUPPORTED`]
715715
//
716-
// See [`YRX_MOST_EXPENSIVE_RULES_CALLBACK`] for more details.
717-
enum YRX_RESULT yrx_scanner_iter_most_expensive_rules(struct YRX_SCANNER *scanner,
718-
size_t n,
719-
YRX_MOST_EXPENSIVE_RULES_CALLBACK callback,
720-
void *user_data);
716+
// See [`YRX_SLOWEST_RULES_CALLBACK`] for more details.
717+
enum YRX_RESULT yrx_scanner_iter_slowest_rules(struct YRX_SCANNER *scanner,
718+
size_t n,
719+
YRX_SLOWEST_RULES_CALLBACK callback,
720+
void *user_data);
721721

722722
#endif /* YARA_X */

capi/src/scanner.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ unsafe fn slice_from_ptr_and_len<'a>(
303303
Some(data)
304304
}
305305

306-
/// Callback function passed to [`yrx_scanner_iter_most_expensive_rules`].
306+
/// Callback function passed to [`yrx_scanner_iter_slowest_rules`].
307307
///
308308
/// The callback function receives pointers to the namespace and rule name,
309309
/// and two float numbers with the time spent by the rule matching patterns
@@ -314,26 +314,26 @@ unsafe fn slice_from_ptr_and_len<'a>(
314314
/// data owned by the user.
315315
///
316316
/// Requires the `rules-profiling` feature.
317-
pub type YRX_MOST_EXPENSIVE_RULES_CALLBACK = extern "C" fn(
317+
pub type YRX_SLOWEST_RULES_CALLBACK = extern "C" fn(
318318
namespace: *const c_char,
319319
rule: *const c_char,
320320
pattern_matching_time: f64,
321321
condition_exec_time: f64,
322322
user_data: *mut c_void,
323323
) -> ();
324324

325-
/// Iterates over the top N most expensive rules, calling the callback for
326-
/// each rule.
325+
/// Iterates over the slowest N rules, calling the callback for each rule.
327326
///
328-
/// Requires the `rules-profiling` feature, otherwise the
327+
/// Requires the `rules-profiling` feature, otherwise returns
328+
/// [`YRX_RESULT::NOT_SUPPORTED`]
329329
///
330-
/// See [`YRX_MOST_EXPENSIVE_RULES_CALLBACK`] for more details.
330+
/// See [`YRX_SLOWEST_RULES_CALLBACK`] for more details.
331331
#[no_mangle]
332332
#[allow(unused_variables)]
333-
pub unsafe extern "C" fn yrx_scanner_iter_most_expensive_rules(
333+
pub unsafe extern "C" fn yrx_scanner_iter_slowest_rules(
334334
scanner: *mut YRX_SCANNER,
335335
n: usize,
336-
callback: YRX_MOST_EXPENSIVE_RULES_CALLBACK,
336+
callback: YRX_SLOWEST_RULES_CALLBACK,
337337
user_data: *mut c_void,
338338
) -> YRX_RESULT {
339339
#[cfg(not(feature = "rules-profiling"))]
@@ -346,7 +346,7 @@ pub unsafe extern "C" fn yrx_scanner_iter_most_expensive_rules(
346346
None => return YRX_RESULT::INVALID_ARGUMENT,
347347
};
348348

349-
for profiling_info in scanner.inner.most_expensive_rules(n) {
349+
for profiling_info in scanner.inner.slowest_rules(n) {
350350
let namespace = CString::new(profiling_info.namespace).unwrap();
351351
let rule = CString::new(profiling_info.rule).unwrap();
352352

cli/src/commands/scan.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -354,8 +354,7 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
354354
};
355355

356356
#[cfg(feature = "rules-profiling")]
357-
let most_expensive_rules: Mutex<Vec<ProfilingData>> =
358-
Mutex::new(Vec::new());
357+
let slowest_rules: Mutex<Vec<ProfilingData>> = Mutex::new(Vec::new());
359358

360359
w.walk(
361360
state,
@@ -447,17 +446,20 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
447446
|scanner, _| {
448447
#[cfg(feature = "rules-profiling")]
449448
if profiling {
450-
let mut mer = most_expensive_rules.lock().unwrap();
451-
for er in scanner.most_expensive_rules(1000) {
449+
let mut mer = slowest_rules.lock().unwrap();
450+
for profiling_data in scanner.slowest_rules(1000) {
452451
if let Some(r) = mer.iter_mut().find(|r| {
453-
r.rule == er.rule && r.namespace == er.namespace
452+
r.rule == profiling_data.rule
453+
&& r.namespace == profiling_data.namespace
454454
}) {
455-
r.condition_exec_time += er.condition_exec_time;
456-
r.pattern_matching_time += er.pattern_matching_time;
457-
r.total_time +=
458-
er.condition_exec_time + er.pattern_matching_time;
455+
r.condition_exec_time +=
456+
profiling_data.condition_exec_time;
457+
r.pattern_matching_time +=
458+
profiling_data.pattern_matching_time;
459+
r.total_time += profiling_data.condition_exec_time
460+
+ profiling_data.pattern_matching_time;
459461
} else {
460-
mer.push(er.into());
462+
mer.push(profiling_data.into());
461463
}
462464
}
463465
}
@@ -495,7 +497,7 @@ pub fn exec_scan(args: &ArgMatches) -> anyhow::Result<()> {
495497

496498
#[cfg(feature = "rules-profiling")]
497499
if profiling {
498-
let mut mer = most_expensive_rules.lock().unwrap();
500+
let mut mer = slowest_rules.lock().unwrap();
499501

500502
println!("\n«««««««««««« PROFILING INFORMATION »»»»»»»»»»»»");
501503

go/scanner.go

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@ package yara_x
99
// return yrx_scanner_on_matching_rule(scanner, callback, (void*) user_data);
1010
// }
1111
//
12-
// enum YRX_RESULT static inline _yrx_scanner_iter_most_expensive_rules(
12+
// enum YRX_RESULT static inline _yrx_scanner_iter_slowest_rules(
1313
// struct YRX_SCANNER *scanner,
1414
// size_t n,
15-
// YRX_MOST_EXPENSIVE_RULES_CALLBACK callback,
16-
// uintptr_t most_expensive_rules_handle)
15+
// YRX_SLOWEST_RULES_CALLBACK callback,
16+
// uintptr_t slowest_rules_handle)
1717
// {
18-
// return yrx_scanner_iter_most_expensive_rules(scanner, n, callback, (void*) most_expensive_rules_handle);
18+
// return yrx_scanner_iter_slowest_rules(scanner, n, callback, (void*) slowest_rules_handle);
1919
// }
2020
//
2121
// extern void onMatchingRule(YRX_RULE*, uintptr_t);
22-
// extern void mostExpensiveRulesCallback(char*, char*, double, double, uintptr_t);
22+
// extern void slowestRulesCallback(char*, char*, double, double, uintptr_t);
2323
import "C"
2424

2525
import (
@@ -261,10 +261,10 @@ type ProfilingInfo struct {
261261
ConditionExecTime time.Duration
262262
}
263263

264-
// This is the callback called by yrx_rule_iter_patterns.
264+
// This is the callback called by yrx_scanner_iter_slowest_rules.
265265
//
266-
//export mostExpensiveRulesCallback
267-
func mostExpensiveRulesCallback(
266+
//export slowestRulesCallback
267+
func slowestRulesCallback(
268268
namespace *C.char,
269269
rule *C.char,
270270
patternMatchingTime C.double,
@@ -283,29 +283,29 @@ func mostExpensiveRulesCallback(
283283
})
284284
}
285285

286-
// MostExpensiveRules returns information about the slowest rules and how much
286+
// SlowestRules returns information about the slowest rules and how much
287287
// time they spent matching patterns and executing their conditions.
288288
//
289289
// In order to use this function the YARA-X C library must be built with
290290
// support for rules profiling, which is done by enabling the `rules-profiling`
291291
// feature. Otherwise, calling this function will cause a panic.
292-
func (s *Scanner) MostExpensiveRules(n int) []ProfilingInfo {
292+
func (s *Scanner) SlowestRules(n int) []ProfilingInfo {
293293
profilingInfo := make([]ProfilingInfo, 0)
294-
mostExpensiveRules := cgo.NewHandle(&profilingInfo)
295-
defer mostExpensiveRules.Delete()
294+
slowestRules := cgo.NewHandle(&profilingInfo)
295+
defer slowestRules.Delete()
296296

297-
result := C._yrx_scanner_iter_most_expensive_rules(
297+
result := C._yrx_scanner_iter_slowest_rules(
298298
s.cScanner,
299299
C.size_t(n),
300-
C.YRX_MOST_EXPENSIVE_RULES_CALLBACK(C.mostExpensiveRulesCallback),
301-
C.uintptr_t(mostExpensiveRules))
300+
C.YRX_SLOWEST_RULES_CALLBACK(C.slowestRulesCallback),
301+
C.uintptr_t(slowestRules))
302302

303303
if result == C.NOT_SUPPORTED {
304-
panic("MostExpensiveRules requires that the YARA-X C library is built with the `rules-profiling` feature")
304+
panic("SlowestRules requires that the YARA-X C library is built with the `rules-profiling` feature")
305305
}
306306

307307
if result != C.SUCCESS {
308-
panic("yrx_scanner_iter_most_expensive_rules failed")
308+
panic("yrx_scanner_slowest_rules failed")
309309
}
310310

311311
return profilingInfo

lib/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ exclude = [
1919
"src/modules/**/*.out"
2020
]
2121

22+
[package.metadata.docs.rs]
23+
all-features = true
24+
rustdoc-args = ["--cfg", "docsrs"]
25+
2226
[features]
2327
# Enables constant folding. When constant folding is enabled, expressions
2428
# like `2+2+2` and `true or false`, whose value can be determined at compile

lib/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ assert_eq!(results.matching_rules().len(), 1);
4242
*/
4343

4444
#![deny(missing_docs)]
45+
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
4546

4647
pub use compiler::compile;
4748
pub use compiler::Compiler;

lib/src/scanner/context.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,11 @@ pub(crate) struct ScanContext<'r> {
132132

133133
#[cfg(feature = "rules-profiling")]
134134
impl<'r> ScanContext<'r> {
135-
/// Returns the top N most expensive rules.
136-
pub fn most_expensive_rules(&self, n: usize) -> Vec<ProfilingData> {
135+
/// Returns the slowest N rules.
136+
///
137+
/// Profiling has an accumulative effect. When the scanner is used for
138+
/// scanning multiple files the times add up.
139+
pub fn slowest_rules(&self, n: usize) -> Vec<ProfilingData> {
137140
debug_assert_eq!(
138141
self.compiled_rules.num_rules(),
139142
self.time_spent_in_rule.len()
@@ -190,6 +193,12 @@ impl<'r> ScanContext<'r> {
190193
result.truncate(n);
191194
result
192195
}
196+
197+
/// Clears profiling information.
198+
pub fn clear_profiling_data(&mut self) {
199+
self.time_spent_in_rule.fill(0);
200+
self.time_spent_in_pattern.clear();
201+
}
193202
}
194203

195204
impl ScanContext<'_> {

lib/src/scanner/mod.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,25 @@ impl<'r> Scanner<'r> {
527527
)
528528
}
529529

530-
/// Returns the top N most expensive rules.
530+
/// Returns profiling data for the slowest N rules.
531+
///
532+
/// The profiling data reflects the cumulative execution time of each rule
533+
/// across all scanned files. This information is useful for identifying
534+
/// performance bottlenecks. To reset the profiling data and start fresh
535+
/// for subsequent scans, use [`Scanner::clear_profiling_data`].
536+
#[cfg(feature = "rules-profiling")]
537+
pub fn slowest_rules(&self, n: usize) -> Vec<ProfilingData> {
538+
self.wasm_store.data().slowest_rules(n)
539+
}
540+
541+
/// Clears all accumulated profiling data.
542+
///
543+
/// This method resets the profiling data collected during rule execution
544+
/// across scanned files. Use this to start a new profiling session, ensuring
545+
/// the results reflect only the data gathered after this method is called.
531546
#[cfg(feature = "rules-profiling")]
532-
pub fn most_expensive_rules(&self, n: usize) -> Vec<ProfilingData> {
533-
self.wasm_store.data().most_expensive_rules(n)
547+
pub fn clear_profiling_data(&mut self) {
548+
self.wasm_store.data_mut().clear_profiling_data()
534549
}
535550
}
536551

@@ -757,7 +772,7 @@ impl<'r> Scanner<'r> {
757772

758773
#[cfg(all(feature = "rules-profiling", feature = "logging"))]
759774
{
760-
let most_expensive_rules = self.most_expensive_rules(10);
775+
let most_expensive_rules = self.slowest_rules(10);
761776
if !most_expensive_rules.is_empty() {
762777
log::info!("Most expensive rules:");
763778
for profiling_data in most_expensive_rules {

0 commit comments

Comments
 (0)