Skip to content

Commit 865db1d

Browse files
committed
refactor: allow calling non-global rules from global ones.
1 parent c7759f8 commit 865db1d

File tree

12 files changed

+173
-361
lines changed

12 files changed

+173
-361
lines changed

lib/src/compiler/context.rs

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ use std::rc::Rc;
55
use yara_x_parser::report::ReportBuilder;
66

77
use crate::compiler::ir::PatternIdx;
8-
use crate::compiler::{ir, IdentId, RuleId, RuleInfo, Warnings};
9-
use crate::string_pool::StringPool;
8+
use crate::compiler::{ir, Warnings};
109
use crate::symbols::{StackedSymbolTable, SymbolLookup};
1110
use crate::types::Type;
1211
use crate::wasm;
@@ -26,19 +25,13 @@ pub(in crate::compiler) struct CompileContext<'a, 'src, 'sym> {
2625
/// (i.e: `symbol_table`) is ignored.
2726
pub current_symbol_table: Option<Rc<dyn SymbolLookup + 'a>>,
2827

29-
/// Information about the rules compiled so far.
30-
pub rules: &'a Vec<RuleInfo>,
31-
3228
/// Reference to a vector that contains the IR for the patterns declared
3329
/// in the current rule.
3430
pub current_rule_patterns: &'a mut Vec<ir::PatternInRule<'src>>,
3531

3632
/// Warnings generated during the compilation.
3733
pub warnings: &'a mut Warnings,
3834

39-
/// Pool with identifiers used in the rules.
40-
pub ident_pool: &'a mut StringPool<IdentId>,
41-
4235
/// Stack of variables. These are local variables used during the
4336
/// evaluation of rule conditions, for example for storing loop variables.
4437
pub vars: VarStack,
@@ -48,23 +41,6 @@ pub(in crate::compiler) struct CompileContext<'a, 'src, 'sym> {
4841
}
4942

5043
impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> {
51-
/// Returns a [`RuleInfo`] given its [`RuleId`].
52-
///
53-
/// # Panics
54-
///
55-
/// If no rule with such [`RuleId`] exists.
56-
#[inline]
57-
pub fn get_rule(&self, rule_id: RuleId) -> &RuleInfo {
58-
self.rules.get(rule_id.0 as usize).unwrap()
59-
}
60-
61-
/// Returns the [`RuleInfo`] structure corresponding to the rule currently
62-
/// being compiled.
63-
#[inline]
64-
pub fn get_current_rule(&self) -> &RuleInfo {
65-
self.rules.last().unwrap()
66-
}
67-
6844
/// Given a pattern identifier (e.g. `$a`, `#a`, `@a`) search for it in
6945
/// the current rule and return its position.
7046
///

lib/src/compiler/emit.rs

Lines changed: 2 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -246,21 +246,7 @@ pub(super) fn emit_rule_condition(
246246
rule_id: RuleId,
247247
condition: &mut Expr,
248248
) {
249-
// Global and non-global rules are put into two independent instruction
250-
// sequences. The global rules are put into the instruction sequence that
251-
// gets executed first, which means that global rules will be executed
252-
// before any non-global rule, regardless of their order in the source
253-
// code. Within the same group (global and non-global) rules maintain their
254-
// relative order, though.
255-
//
256-
// Global rules can not invoke non-global rule. As global rules will always
257-
// run before non-global ones, the former can't rely on the result of the
258-
// latter.
259-
let mut instr = if ctx.current_rule.is_global {
260-
builder.new_global_rule()
261-
} else {
262-
builder.new_rule()
263-
};
249+
let mut instr = builder.start_rule(rule_id, ctx.current_rule.is_global);
264250

265251
// When the "logging" feature is enabled, print a log before the starting
266252
// evaluating the rule's condition. In case of error during the evaluation
@@ -286,42 +272,7 @@ pub(super) fn emit_rule_condition(
286272
},
287273
);
288274

289-
// Check if the result from the condition is zero (false).
290-
instr.unop(UnaryOp::I32Eqz);
291-
instr.if_else(
292-
None,
293-
|then_| {
294-
// The condition is false. For normal rules we don't do anything,
295-
// but for global rules we must call `global_rule_no_match` and
296-
// return 1.
297-
//
298-
// By returning 1 the function that contains the logic for this
299-
// rule exits immediately, preventing any other rule (both global
300-
// and non-global) in the same namespace is executed, and therefore
301-
// they will remain false.
302-
//
303-
// This guarantees that any global rule that returns false, forces
304-
// the non-global rules in the same namespace to be false. There
305-
// may be some global rules that matched before, though. The
306-
// purpose of `global_rule_no_match` is reverting those previous
307-
// matches.
308-
if ctx.current_rule.is_global {
309-
// Call `global_rule_no_match`.
310-
then_.i32_const(rule_id.0);
311-
then_.call(ctx.function_id(
312-
wasm::export__global_rule_no_match.mangled_name,
313-
));
314-
// Return 1.
315-
then_.i32_const(1);
316-
then_.return_();
317-
}
318-
},
319-
|else_| {
320-
// The condition is true, call `rule_match`.
321-
else_.i32_const(rule_id.0);
322-
else_.call(ctx.function_id(wasm::export__rule_match.mangled_name));
323-
},
324-
);
275+
builder.finish_rule();
325276
}
326277

327278
/// Emits WASM code for `expr` into the instruction sequence `instr`.

lib/src/compiler/errors.rs

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -155,30 +155,6 @@ pub enum CompileError {
155155
ident_span: Span,
156156
},
157157

158-
#[error("global rule `{global_rule}` depends on non-global rule `{non_global_rule}`")]
159-
#[label(
160-
"`{non_global_rule}` is used in the condition of `{global_rule}`",
161-
non_global_rule_usage_span
162-
)]
163-
#[label(
164-
"non-global rule `{non_global_rule}` declared here",
165-
non_global_rule_span,
166-
style = "note"
167-
)]
168-
#[label(
169-
"global rule `{global_rule}` declared here",
170-
global_rule_span,
171-
style = "note"
172-
)]
173-
WrongRuleDependency {
174-
detailed_report: String,
175-
global_rule: String,
176-
non_global_rule: String,
177-
global_rule_span: Span,
178-
non_global_rule_span: Span,
179-
non_global_rule_usage_span: Span,
180-
},
181-
182158
#[error("invalid regular expression")]
183159
#[label("{error}", span)]
184160
#[note(note)]

lib/src/compiler/ir/ast2ir.rs

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -385,34 +385,7 @@ pub(in crate::compiler) fn expr_from_ast(
385385
}
386386

387387
let symbol = symbol.unwrap();
388-
389-
// Return error if a global rule depends on a non-global rule. This
390-
// is an error because global rules are evaluated before non-global
391-
// rules, even if the global rule appears after the non-global one
392-
// in the source code. This means that by the time the global rule
393-
// is being evaluated we can't know if the non-global rule matched
394-
// or not.
395-
// A global rule can depend on another global rule. And non-global
396-
// rules can depend both on global rules and non-global ones.
397-
if let SymbolKind::Rule(rule_id) = symbol.kind() {
398-
let current_rule = ctx.get_current_rule();
399-
let used_rule = ctx.get_rule(*rule_id);
400-
if current_rule.is_global && !used_rule.is_global {
401-
return Err(Box::new(CompileError::wrong_rule_dependency(
402-
ctx.report_builder,
403-
ctx.ident_pool
404-
.get(current_rule.ident_id)
405-
.unwrap()
406-
.to_string(),
407-
ident.name.to_string(),
408-
current_rule.ident_span,
409-
used_rule.ident_span,
410-
ident.span,
411-
),
412-
));
413-
}
414-
}
415-
388+
416389
#[cfg(feature = "constant-folding")]
417390
{
418391
let type_value = symbol.type_value();

lib/src/compiler/mod.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -780,9 +780,7 @@ impl<'a> Compiler<'a> {
780780
relaxed_re_syntax: self.relaxed_re_syntax,
781781
current_symbol_table: None,
782782
symbol_table: &mut self.symbol_table,
783-
ident_pool: &mut self.ident_pool,
784783
report_builder: &self.report_builder,
785-
rules: &self.rules,
786784
current_rule_patterns: &mut rule_patterns,
787785
warnings: &mut self.warnings,
788786
vars: VarStack::new(),
@@ -1722,7 +1720,7 @@ impl From<LiteralId> for u64 {
17221720
pub(crate) struct NamespaceId(i32);
17231721

17241722
/// ID associated to each rule.
1725-
#[derive(Copy, Clone, Debug)]
1723+
#[derive(Copy, Clone, Debug, Default)]
17261724
pub(crate) struct RuleId(i32);
17271725

17281726
impl From<i32> for RuleId {
@@ -1746,6 +1744,13 @@ impl From<RuleId> for usize {
17461744
}
17471745
}
17481746

1747+
impl From<RuleId> for i32 {
1748+
#[inline]
1749+
fn from(value: RuleId) -> Self {
1750+
value.0
1751+
}
1752+
}
1753+
17491754
/// ID associated to each regexp used in a rule condition.
17501755
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
17511756
pub(crate) struct RegexpId(i32);

lib/src/compiler/tests/testdata/errors/36.in

Lines changed: 0 additions & 9 deletions
This file was deleted.

lib/src/compiler/tests/testdata/errors/36.out

Lines changed: 0 additions & 14 deletions
This file was deleted.

lib/src/scanner/context.rs

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,17 @@ pub(crate) struct ScanContext<'r> {
4949
/// Length of data being scanned.
5050
pub scanned_data_len: usize,
5151
/// Vector containing the IDs of the non-private rules that matched,
52-
/// including both global and non-global ones. Global rules are initially
53-
/// added to `global_matching_rules`, and once all the rules in the
54-
/// namespace are evaluated, the global rules that matched are moved
55-
/// to this vector.
52+
/// including both global and non-global ones. The rules are added first
53+
/// to the `matching_rules` map, and then moved to this vector once the
54+
/// scan finishes.
5655
pub non_private_matching_rules: Vec<RuleId>,
5756
/// Vector containing the IDs of the private rules that matched, including
58-
/// both global and non-global ones.
57+
/// both global and non-global ones. The rules are added first to the
58+
/// `matching_rules` map, and then moved to this vector once the scan
59+
/// finishes.
5960
pub private_matching_rules: Vec<RuleId>,
60-
/// Map containing the IDs of the global rules that matched.
61-
pub global_matching_rules: FxHashMap<NamespaceId, Vec<RuleId>>,
61+
/// Map containing the IDs of rules that matched.
62+
pub matching_rules: FxHashMap<NamespaceId, Vec<RuleId>>,
6263
/// Compiled rules for this scan.
6364
pub compiled_rules: &'r Rules,
6465
/// Structure that contains top-level symbols, like module names
@@ -246,21 +247,19 @@ impl ScanContext<'_> {
246247

247248
/// Called during the scan process when a global rule didn't match.
248249
///
249-
/// When this happens any other global rule in the same namespace that
250-
/// matched previously is reset to a non-matching state.
250+
/// When this happens any other rule in the same namespace that matched
251+
/// previously is reset to a non-matching state.
251252
pub(crate) fn track_global_rule_no_match(&mut self, rule_id: RuleId) {
252253
let rule = self.compiled_rules.get(rule_id);
253254

254255
// This function must be called only for global rules.
255256
debug_assert!(rule.is_global);
256257

257-
// All the global rules that matched previously, and are in the same
258+
// All the rules that matched previously, and are in the same
258259
// namespace as the non-matching rule, must be removed from the
259-
// `global_matching_rules` map. Also, their corresponding bits in
260+
// `matching_rules` map. Also, their corresponding bits in
260261
// the matching rules bitmap must be cleared.
261-
if let Some(rules) =
262-
self.global_matching_rules.get_mut(&rule.namespace_id)
263-
{
262+
if let Some(rules) = self.matching_rules.get_mut(&rule.namespace_id) {
264263
let wasm_store = unsafe { self.wasm_store.as_mut() };
265264
let main_mem = self.main_memory.unwrap().data_mut(wasm_store);
266265

@@ -293,16 +292,10 @@ impl ScanContext<'_> {
293292
rule_id,
294293
);
295294

296-
if rule.is_global {
297-
self.global_matching_rules
298-
.entry(rule.namespace_id)
299-
.or_default()
300-
.push(rule_id);
301-
} else if rule.is_private {
302-
self.private_matching_rules.push(rule_id);
303-
} else {
304-
self.non_private_matching_rules.push(rule_id);
305-
}
295+
self.matching_rules
296+
.entry(rule.namespace_id)
297+
.or_default()
298+
.push(rule_id);
306299

307300
let wasm_store = unsafe { self.wasm_store.as_mut() };
308301
let mem = self.main_memory.unwrap().data_mut(wasm_store);

lib/src/scanner/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ impl<'r> Scanner<'r> {
151151
scanned_data_len: 0,
152152
private_matching_rules: Vec::new(),
153153
non_private_matching_rules: Vec::new(),
154-
global_matching_rules: FxHashMap::default(),
154+
matching_rules: FxHashMap::default(),
155155
main_memory: None,
156156
module_outputs: FxHashMap::default(),
157157
user_provided_module_outputs: FxHashMap::default(),
@@ -671,10 +671,10 @@ impl<'r> Scanner<'r> {
671671
// to some struct.
672672
ctx.current_struct = None;
673673

674-
// Move all the in `global_matching_rules` to `private_matching_rules`
675-
// and `non_private_matching_rules`, leaving `global_matching_rules`
674+
// Move all the in `matching_rules` to `private_matching_rules`
675+
// and `non_private_matching_rules`, leaving `matching_rules`
676676
// empty.
677-
for rules in ctx.global_matching_rules.values_mut() {
677+
for rules in ctx.matching_rules.values_mut() {
678678
for rule_id in rules.drain(0..) {
679679
if ctx.compiled_rules.get(rule_id).is_private {
680680
ctx.private_matching_rules.push(rule_id);

lib/src/scanner/tests.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,10 +401,15 @@ fn global_rules() {
401401
compiler
402402
.add_source(
403403
r#"
404+
// This rule is always true.
405+
private rule const_true {
406+
condition:
407+
true
408+
}
404409
// This global rule doesn't affect the results because it's true.
405410
global rule global_true {
406411
condition:
407-
true
412+
const_true
408413
}
409414
// Even if the condition is true, this rule doesn't match because of
410415
// the false global rule that follows.

0 commit comments

Comments
 (0)