diff --git a/analyzer/linux/analyzer.py b/analyzer/linux/analyzer.py index 710dd664270..bf633a39109 100644 --- a/analyzer/linux/analyzer.py +++ b/analyzer/linux/analyzer.py @@ -6,6 +6,7 @@ import logging import os import pkgutil +import re import sys import tempfile import time @@ -24,6 +25,7 @@ from lib.core.packages import choose_package_class from lib.core.startup import create_folders, init_logging from modules import auxiliary +from threading import Thread log = logging.getLogger() @@ -33,7 +35,7 @@ PROCESS_LIST = set() SEEN_LIST = set() PPID = Process(pid=PID).get_parent_pid() - +MEM_PATH = PATHS.get("memory") def add_pids(pids): """Add PID.""" @@ -47,7 +49,6 @@ def add_pids(pids): PROCESS_LIST.add(pid) SEEN_LIST.add(pid) - def dump_files(): """Dump all the dropped files.""" for file_path in FILES_LIST: @@ -58,6 +59,73 @@ def dump_files(): category="tlsdump", ) +def monitor_new_processes(parent_pid, interval=0.25): + """Continuously monitor for new child processes.""" + known_processes = set(get_all_child_processes(parent_pid)) + while True: + current_processes = set(get_all_child_processes(parent_pid)) + new_processes = current_processes - known_processes + + for pid in new_processes: + log.info(f"New child process detected: {pid}") + dump_memory(pid) + add_pids(pid) # Add the new process to PROCESS_LIST + + known_processes.update(new_processes) + time.sleep(interval) + +def get_all_child_processes(parent_pid, all_children=None): + """Recursively finds all child processes of a given parent PID.""" + if all_children is None: + all_children = [] + try: + children_file_path = f"/proc/{parent_pid}/task/{parent_pid}/children" + with open(children_file_path, "r") as f: + for child_pid in f.read().strip().split(): + all_children.append(int(child_pid)) + get_all_child_processes(int(child_pid), all_children) + except FileNotFoundError: + pass + return all_children + +def dump_memory(pid): + """Dump memory of a process, avoiding duplicates.""" + #with process_lock: + if pid in DUMPED_LIST: + return # Skip if already dumped + try: + maps_file = open(f"/proc/{pid}/maps", 'r') + mem_file = open(f"/proc/{pid}/mem", 'rb', 0) + output_file = open(f"{MEM_PATH}/{pid}.dump", 'wb') + + for line in maps_file.readlines(): + m = re.match(r'([0-9A-Fa-f]+)-([0-9A-Fa-f]+) ([-r])(\S+)\s+\d+\s+\S+\s+\d+\s*(.*)?', line) + if m and m.group(3) == 'r': + # Testing: Uncomment to skip memory regions associated with dynamic libraries + # pathname = m.group(5) + # if pathname and (pathname.endswith('.so') or 'lib' in pathname or '[' in pathname): + # continue + start = int(m.group(1), 16) + end = int(m.group(2), 16) + try: + mem_file.seek(start) + chunk = mem_file.read(end - start) + output_file.write(chunk) + except (OSError, ValueError) as e: + log.error(f"Could not read memory range {start:x}-{end:x}: {e}") + maps_file.close() + mem_file.close() + output_file.close() + except FileNotFoundError: + log.error(f"Process with PID {pid} not found.") + except PermissionError: + log.error(f"Permission denied to access process with PID {pid}.") + + if os.path.exists(f"{MEM_PATH}/{pid}.dump"): + upload_to_host(f"{MEM_PATH}/{pid}.dump", f"memory/{pid}.dump") + DUMPED_LIST.add(pid) + else: + log.error(f"Memdump file not found in guest machine for PID {pid}") class Analyzer: """Cuckoo Linux Analyzer. @@ -234,6 +302,15 @@ def run(self): log.info("No process IDs returned by the package, running for the full timeout") pid_check = False + if PROCESS_LIST: + PID = next(iter(PROCESS_LIST)) + else: + raise ValueError("No PID available to monitor.") + + # Start the monitoring thread before the analysis loop + monitor_thread = Thread(target=monitor_new_processes, args=(PID,), daemon=True) + monitor_thread.start() + # Check in the options if the user toggled the timeout enforce. If so, # we need to override pid_check and disable process monitor. if self.config.enforce_timeout: diff --git a/analyzer/linux/modules/auxiliary/filecollector.py b/analyzer/linux/modules/auxiliary/filecollector.py index a315a81c1f6..ea6945f37a7 100755 --- a/analyzer/linux/modules/auxiliary/filecollector.py +++ b/analyzer/linux/modules/auxiliary/filecollector.py @@ -5,6 +5,7 @@ from threading import Thread from lib.common.abstracts import Auxiliary +from lib.common.constants import ROOT from lib.common.hashing import hash_file from lib.common.results import upload_to_host @@ -125,6 +126,10 @@ def _method_name(self, event): # log.info("Not currently set to collect %s", event.pathname) return + if event.pathname.startswith(ROOT): + # log.info("Skipping random base directory for file %s", event.pathname) + return + if event.pathname.startswith("/tmp/#"): # log.info("Skipping wierd file %s", event.pathname) return