Skip to content

Commit

Permalink
Merge pull request #71 from seb-acsc/feat/performance
Browse files Browse the repository at this point in the history
Improve non-venv performance by 85x
  • Loading branch information
cccs-rs authored Dec 10, 2024
2 parents d3857bd + 6fab062 commit 6c187e1
Show file tree
Hide file tree
Showing 13 changed files with 255 additions and 118 deletions.
3 changes: 1 addition & 2 deletions demo_extractors/complex/complex.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from io import BytesIO
from typing import List, Optional

from maco import extractor, model, yara

from demo_extractors.complex import complex_utils
from maco import extractor, model, yara


class Complex(extractor.Extractor):
Expand Down
7 changes: 5 additions & 2 deletions demo_extractors/limit_other.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from io import BytesIO
from typing import Dict, List, Optional

from maco import extractor, model, yara

from demo_extractors import shared
from maco import extractor, model, yara


class LimitOther(extractor.Extractor):
Expand All @@ -24,6 +23,10 @@ class LimitOther(extractor.Extractor):
"""

def run(self, stream: BytesIO, matches: List[yara.Match]) -> Optional[model.ExtractorModel]:
# import httpx at runtime so we can test that requirements.txt is installed dynamically without breaking
# the tests that do direct importing
import httpx

# use a custom model that inherits from ExtractorModel
# this model defines what can go in the 'other' dict
tmp = shared.MyCustomModel(family="specify_other")
Expand Down
1 change: 1 addition & 0 deletions demo_extractors/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
httpx
1 change: 1 addition & 0 deletions demo_extractors/shared.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Optional

import pydantic

from maco import model
Expand Down
23 changes: 15 additions & 8 deletions maco/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,19 @@ class BaseTest(unittest.TestCase):
# I recommend something like os.path.join(__file__, "../../extractors")
# if your extractors are in a folder 'extractors' next to a folder of tests
path: str = None
create_venv: bool=False

def setUp(self) -> None:
if not self.name or not self.path:
@classmethod
def setUpClass(cls) -> None:
if not cls.name or not cls.path:
raise Exception("name and path must be set")
self.c = collector.Collector(self.path, include=[self.name])
cls.c = collector.Collector(cls.path, include=[cls.name], create_venv=cls.create_venv)
return super().setUpClass()

def test_default_metadata(self):
"""Require extractor to be loadable and valid."""
self.assertIn(self.name, self.c.extractors)
self.assertEqual(len(self.c.extractors), 1)
return super().setUp()

def extract(self, stream):
"""Return results for running extractor over stream, including yara check."""
Expand All @@ -49,18 +54,20 @@ def extract(self, stream):
resp = self.c.extract(stream, self.name)
return resp

def _get_location(self) -> str:
@classmethod
def _get_location(cls) -> str:
"""Return path to child class that implements this class."""
# import child module
module = type(self).__module__
module = cls.__module__
i = importlib.import_module(module)
# get location to child module
return i.__file__

def load_cart(self, filepath: str) -> io.BytesIO:
@classmethod
def load_cart(cls, filepath: str) -> io.BytesIO:
"""Load and unneuter a test file (likely malware) into memory for processing."""
# it is nice if we can load files relative to whatever is implementing base_test
dirpath = os.path.split(self._get_location())[0]
dirpath = os.path.split(cls._get_location())[0]
# either filepath is absolute, or should be loaded relative to child of base_test
filepath = os.path.join(dirpath, filepath)
if not os.path.isfile(filepath):
Expand Down
2 changes: 1 addition & 1 deletion maco/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def main():
parser.add_argument(
"--create_venv",
action="store_true",
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory)",
help="Creates venvs for every requirements.txt found (only applies when extractor path is a directory). This runs much slower than the alternative but may be necessary when there are many extractors with conflicting dependencies.",
)
args = parser.parse_args()
inc = args.include.split(",") if args.include else []
Expand Down
8 changes: 4 additions & 4 deletions maco/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ def __init__(self) -> None:
# check yara rules conform to expected structure
# we throw away these compiled rules as we need all rules in system compiled together
try:
rules = yara.compile(source=self.yara_rule)
self.yara_compiled = yara.compile(source=self.yara_rule)
except yara.SyntaxError as e:
raise InvalidExtractor(f"{self.name} - invalid yara rule") from e
# need to track which plugin owns the rules
self.yara_rule_names = [x.identifier for x in rules]
if not len(list(rules)):
self.yara_rule_names = [x.identifier for x in self.yara_compiled]
if not len(list(self.yara_compiled)):
raise InvalidExtractor(f"{name} must define at least one yara rule")
for x in rules:
for x in self.yara_compiled:
if x.is_global:
raise InvalidExtractor(f"{x.identifier} yara rule must not be global")

Expand Down
Loading

0 comments on commit 6c187e1

Please sign in to comment.