Skip to content

Commit

Permalink
refactor: allow calling non-global rules from global ones.
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed May 27, 2024
1 parent c7759f8 commit 865db1d
Show file tree
Hide file tree
Showing 12 changed files with 173 additions and 361 deletions.
26 changes: 1 addition & 25 deletions lib/src/compiler/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ use std::rc::Rc;
use yara_x_parser::report::ReportBuilder;

use crate::compiler::ir::PatternIdx;
use crate::compiler::{ir, IdentId, RuleId, RuleInfo, Warnings};
use crate::string_pool::StringPool;
use crate::compiler::{ir, Warnings};
use crate::symbols::{StackedSymbolTable, SymbolLookup};
use crate::types::Type;
use crate::wasm;
Expand All @@ -26,19 +25,13 @@ pub(in crate::compiler) struct CompileContext<'a, 'src, 'sym> {
/// (i.e: `symbol_table`) is ignored.
pub current_symbol_table: Option<Rc<dyn SymbolLookup + 'a>>,

/// Information about the rules compiled so far.
pub rules: &'a Vec<RuleInfo>,

/// Reference to a vector that contains the IR for the patterns declared
/// in the current rule.
pub current_rule_patterns: &'a mut Vec<ir::PatternInRule<'src>>,

/// Warnings generated during the compilation.
pub warnings: &'a mut Warnings,

/// Pool with identifiers used in the rules.
pub ident_pool: &'a mut StringPool<IdentId>,

/// Stack of variables. These are local variables used during the
/// evaluation of rule conditions, for example for storing loop variables.
pub vars: VarStack,
Expand All @@ -48,23 +41,6 @@ pub(in crate::compiler) struct CompileContext<'a, 'src, 'sym> {
}

impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> {
/// Returns a [`RuleInfo`] given its [`RuleId`].
///
/// # Panics
///
/// If no rule with such [`RuleId`] exists.
#[inline]
pub fn get_rule(&self, rule_id: RuleId) -> &RuleInfo {
self.rules.get(rule_id.0 as usize).unwrap()
}

/// Returns the [`RuleInfo`] structure corresponding to the rule currently
/// being compiled.
#[inline]
pub fn get_current_rule(&self) -> &RuleInfo {
self.rules.last().unwrap()
}

/// Given a pattern identifier (e.g. `$a`, `#a`, `@a`) search for it in
/// the current rule and return its position.
///
Expand Down
53 changes: 2 additions & 51 deletions lib/src/compiler/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,21 +246,7 @@ pub(super) fn emit_rule_condition(
rule_id: RuleId,
condition: &mut Expr,
) {
// Global and non-global rules are put into two independent instruction
// sequences. The global rules are put into the instruction sequence that
// gets executed first, which means that global rules will be executed
// before any non-global rule, regardless of their order in the source
// code. Within the same group (global and non-global) rules maintain their
// relative order, though.
//
// Global rules can not invoke non-global rule. As global rules will always
// run before non-global ones, the former can't rely on the result of the
// latter.
let mut instr = if ctx.current_rule.is_global {
builder.new_global_rule()
} else {
builder.new_rule()
};
let mut instr = builder.start_rule(rule_id, ctx.current_rule.is_global);

// When the "logging" feature is enabled, print a log before the starting
// evaluating the rule's condition. In case of error during the evaluation
Expand All @@ -286,42 +272,7 @@ pub(super) fn emit_rule_condition(
},
);

// Check if the result from the condition is zero (false).
instr.unop(UnaryOp::I32Eqz);
instr.if_else(
None,
|then_| {
// The condition is false. For normal rules we don't do anything,
// but for global rules we must call `global_rule_no_match` and
// return 1.
//
// By returning 1 the function that contains the logic for this
// rule exits immediately, preventing any other rule (both global
// and non-global) in the same namespace is executed, and therefore
// they will remain false.
//
// This guarantees that any global rule that returns false, forces
// the non-global rules in the same namespace to be false. There
// may be some global rules that matched before, though. The
// purpose of `global_rule_no_match` is reverting those previous
// matches.
if ctx.current_rule.is_global {
// Call `global_rule_no_match`.
then_.i32_const(rule_id.0);
then_.call(ctx.function_id(
wasm::export__global_rule_no_match.mangled_name,
));
// Return 1.
then_.i32_const(1);
then_.return_();
}
},
|else_| {
// The condition is true, call `rule_match`.
else_.i32_const(rule_id.0);
else_.call(ctx.function_id(wasm::export__rule_match.mangled_name));
},
);
builder.finish_rule();
}

/// Emits WASM code for `expr` into the instruction sequence `instr`.
Expand Down
24 changes: 0 additions & 24 deletions lib/src/compiler/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,30 +155,6 @@ pub enum CompileError {
ident_span: Span,
},

#[error("global rule `{global_rule}` depends on non-global rule `{non_global_rule}`")]
#[label(
"`{non_global_rule}` is used in the condition of `{global_rule}`",
non_global_rule_usage_span
)]
#[label(
"non-global rule `{non_global_rule}` declared here",
non_global_rule_span,
style = "note"
)]
#[label(
"global rule `{global_rule}` declared here",
global_rule_span,
style = "note"
)]
WrongRuleDependency {
detailed_report: String,
global_rule: String,
non_global_rule: String,
global_rule_span: Span,
non_global_rule_span: Span,
non_global_rule_usage_span: Span,
},

#[error("invalid regular expression")]
#[label("{error}", span)]
#[note(note)]
Expand Down
29 changes: 1 addition & 28 deletions lib/src/compiler/ir/ast2ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,34 +385,7 @@ pub(in crate::compiler) fn expr_from_ast(
}

let symbol = symbol.unwrap();

// Return error if a global rule depends on a non-global rule. This
// is an error because global rules are evaluated before non-global
// rules, even if the global rule appears after the non-global one
// in the source code. This means that by the time the global rule
// is being evaluated we can't know if the non-global rule matched
// or not.
// A global rule can depend on another global rule. And non-global
// rules can depend both on global rules and non-global ones.
if let SymbolKind::Rule(rule_id) = symbol.kind() {
let current_rule = ctx.get_current_rule();
let used_rule = ctx.get_rule(*rule_id);
if current_rule.is_global && !used_rule.is_global {
return Err(Box::new(CompileError::wrong_rule_dependency(
ctx.report_builder,
ctx.ident_pool
.get(current_rule.ident_id)
.unwrap()
.to_string(),
ident.name.to_string(),
current_rule.ident_span,
used_rule.ident_span,
ident.span,
),
));
}
}


#[cfg(feature = "constant-folding")]
{
let type_value = symbol.type_value();
Expand Down
11 changes: 8 additions & 3 deletions lib/src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -780,9 +780,7 @@ impl<'a> Compiler<'a> {
relaxed_re_syntax: self.relaxed_re_syntax,
current_symbol_table: None,
symbol_table: &mut self.symbol_table,
ident_pool: &mut self.ident_pool,
report_builder: &self.report_builder,
rules: &self.rules,
current_rule_patterns: &mut rule_patterns,
warnings: &mut self.warnings,
vars: VarStack::new(),
Expand Down Expand Up @@ -1722,7 +1720,7 @@ impl From<LiteralId> for u64 {
pub(crate) struct NamespaceId(i32);

/// ID associated to each rule.
#[derive(Copy, Clone, Debug)]
#[derive(Copy, Clone, Debug, Default)]
pub(crate) struct RuleId(i32);

impl From<i32> for RuleId {
Expand All @@ -1746,6 +1744,13 @@ impl From<RuleId> for usize {
}
}

impl From<RuleId> for i32 {
#[inline]
fn from(value: RuleId) -> Self {
value.0
}
}

/// ID associated to each regexp used in a rule condition.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) struct RegexpId(i32);
Expand Down
9 changes: 0 additions & 9 deletions lib/src/compiler/tests/testdata/errors/36.in

This file was deleted.

14 changes: 0 additions & 14 deletions lib/src/compiler/tests/testdata/errors/36.out

This file was deleted.

41 changes: 17 additions & 24 deletions lib/src/scanner/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ pub(crate) struct ScanContext<'r> {
/// Length of data being scanned.
pub scanned_data_len: usize,
/// Vector containing the IDs of the non-private rules that matched,
/// including both global and non-global ones. Global rules are initially
/// added to `global_matching_rules`, and once all the rules in the
/// namespace are evaluated, the global rules that matched are moved
/// to this vector.
/// including both global and non-global ones. The rules are added first
/// to the `matching_rules` map, and then moved to this vector once the
/// scan finishes.
pub non_private_matching_rules: Vec<RuleId>,
/// Vector containing the IDs of the private rules that matched, including
/// both global and non-global ones.
/// both global and non-global ones. The rules are added first to the
/// `matching_rules` map, and then moved to this vector once the scan
/// finishes.
pub private_matching_rules: Vec<RuleId>,
/// Map containing the IDs of the global rules that matched.
pub global_matching_rules: FxHashMap<NamespaceId, Vec<RuleId>>,
/// Map containing the IDs of rules that matched.
pub matching_rules: FxHashMap<NamespaceId, Vec<RuleId>>,
/// Compiled rules for this scan.
pub compiled_rules: &'r Rules,
/// Structure that contains top-level symbols, like module names
Expand Down Expand Up @@ -246,21 +247,19 @@ impl ScanContext<'_> {

/// Called during the scan process when a global rule didn't match.
///
/// When this happens any other global rule in the same namespace that
/// matched previously is reset to a non-matching state.
/// When this happens any other rule in the same namespace that matched
/// previously is reset to a non-matching state.
pub(crate) fn track_global_rule_no_match(&mut self, rule_id: RuleId) {
let rule = self.compiled_rules.get(rule_id);

// This function must be called only for global rules.
debug_assert!(rule.is_global);

// All the global rules that matched previously, and are in the same
// All the rules that matched previously, and are in the same
// namespace as the non-matching rule, must be removed from the
// `global_matching_rules` map. Also, their corresponding bits in
// `matching_rules` map. Also, their corresponding bits in
// the matching rules bitmap must be cleared.
if let Some(rules) =
self.global_matching_rules.get_mut(&rule.namespace_id)
{
if let Some(rules) = self.matching_rules.get_mut(&rule.namespace_id) {
let wasm_store = unsafe { self.wasm_store.as_mut() };
let main_mem = self.main_memory.unwrap().data_mut(wasm_store);

Expand Down Expand Up @@ -293,16 +292,10 @@ impl ScanContext<'_> {
rule_id,
);

if rule.is_global {
self.global_matching_rules
.entry(rule.namespace_id)
.or_default()
.push(rule_id);
} else if rule.is_private {
self.private_matching_rules.push(rule_id);
} else {
self.non_private_matching_rules.push(rule_id);
}
self.matching_rules
.entry(rule.namespace_id)
.or_default()
.push(rule_id);

let wasm_store = unsafe { self.wasm_store.as_mut() };
let mem = self.main_memory.unwrap().data_mut(wasm_store);
Expand Down
8 changes: 4 additions & 4 deletions lib/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ impl<'r> Scanner<'r> {
scanned_data_len: 0,
private_matching_rules: Vec::new(),
non_private_matching_rules: Vec::new(),
global_matching_rules: FxHashMap::default(),
matching_rules: FxHashMap::default(),
main_memory: None,
module_outputs: FxHashMap::default(),
user_provided_module_outputs: FxHashMap::default(),
Expand Down Expand Up @@ -671,10 +671,10 @@ impl<'r> Scanner<'r> {
// to some struct.
ctx.current_struct = None;

// Move all the in `global_matching_rules` to `private_matching_rules`
// and `non_private_matching_rules`, leaving `global_matching_rules`
// Move all the in `matching_rules` to `private_matching_rules`
// and `non_private_matching_rules`, leaving `matching_rules`
// empty.
for rules in ctx.global_matching_rules.values_mut() {
for rules in ctx.matching_rules.values_mut() {
for rule_id in rules.drain(0..) {
if ctx.compiled_rules.get(rule_id).is_private {
ctx.private_matching_rules.push(rule_id);
Expand Down
7 changes: 6 additions & 1 deletion lib/src/scanner/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,10 +401,15 @@ fn global_rules() {
compiler
.add_source(
r#"
// This rule is always true.
private rule const_true {
condition:
true
}
// This global rule doesn't affect the results because it's true.
global rule global_true {
condition:
true
const_true
}
// Even if the condition is true, this rule doesn't match because of
// the false global rule that follows.
Expand Down
Loading

0 comments on commit 865db1d

Please sign in to comment.