aboutcode-org · ziadhany · Dec 1, 2025 · Dec 6, 2025
diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py
@@ -19,6 +19,7 @@
 from vulnerabilities.pipelines import flag_ghost_packages
 from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
 from vulnerabilities.pipelines import remove_duplicate_advisories
+from vulnerabilities.pipelines.v2_improvers import clamav_rules
 from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
 from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
 from vulnerabilities.pipelines.v2_improvers import (
@@ -70,5 +71,6 @@
         compute_advisory_todo_v2.ComputeToDo,
         unfurl_version_range_v2.UnfurlVersionRangePipeline,
         compute_advisory_todo.ComputeToDo,
+        clamav_rules.ClamVRulesImproverPipeline,
     ]
 )
diff --git a/vulnerabilities/migrations/0104_advisorydetectionrule.py b/vulnerabilities/migrations/0104_advisorydetectionrule.py
@@ -0,0 +1,59 @@
+# Generated by Django 4.2.25 on 2025-12-01 20:13
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("vulnerabilities", "0103_codecommit_impactedpackage_affecting_commits_and_more"),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name="AdvisoryDetectionRule",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+                    ),
+                ),
+                (
+                    "rule_text",
+                    models.TextField(
+                        help_text="Full text of the detection rule, script, or signature."
+                    ),
+                ),
+                (
+                    "rule_type",
+                    models.CharField(
+                        blank=True,
+                        choices=[
+                            ("yara", "YARA"),
+                            ("sigma", "Sigma Detection Rule"),
+                            ("clamav", "ClamAV Signature"),
+                        ],
+                        max_length=100,
+                    ),
+                ),
+                (
+                    "source_url",
+                    models.URLField(
+                        blank=True,
+                        help_text="URL or reference to the source of the rule (vendor feed, GitHub repo, etc.).",
+                        null=True,
+                    ),
+                ),
+                (
+                    "advisory",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="detection_rules",
+                        to="vulnerabilities.advisoryv2",
+                    ),
+                ),
+            ],
+        ),
+    ]
diff --git a/vulnerabilities/migrations/0105_alter_advisorydetectionrule_advisory.py b/vulnerabilities/migrations/0105_alter_advisorydetectionrule_advisory.py
@@ -0,0 +1,25 @@
+# Generated by Django 4.2.25 on 2025-12-01 21:52
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("vulnerabilities", "0104_advisorydetectionrule"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="advisorydetectionrule",
+            name="advisory",
+            field=models.ForeignKey(
+                blank=True,
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="detection_rules",
+                to="vulnerabilities.advisoryv2",
+            ),
+        ),
+    ]
diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py
@@ -3414,3 +3414,45 @@ class CodeCommit(models.Model):
 
     class Meta:
         unique_together = ("commit_hash", "vcs_url")
+
+
+class DetectionRuleTypes(models.TextChoices):
+    """Defines the supported formats for security detection rules."""
+
+    YARA = "yara", "Yara"
+    YARA_X = "yara-x", "Yara-X"
+    SIGMA = "sigma", "Sigma"
+    CLAMAV = "clamav", "CLAMAV"
+    SURICATA = "suricata", "Suricata"
+
+
+class DetectionRule(models.Model):
+    """
+    A Detection Rule is code used to identify malicious activity or security threats.
+    """
+
+    rule_type = models.CharField(
+        max_length=50,
+        choices=DetectionRuleTypes.choices,
+        help_text="The type of the detection rule content (e.g., YARA, Sigma).",
+    )
+
+    source_url = models.URLField(
+        max_length=1024, help_text="URL to the original source or reference for this rule."
+    )
+
+    rule_metadata = models.JSONField(
+        null=True,
+        blank=True,
+        help_text="Additional structured data such as tags, or author information.",
+    )
+
+    rule_text = models.TextField(help_text="The content of the detection signature.")
+
+    advisory = models.ForeignKey(
+        AdvisoryV2,
+        related_name="detection_rules",
+        on_delete=models.SET_NULL,
+        null=True,
+        blank=True,
+    )
diff --git a/vulnerabilities/pipelines/v2_improvers/clamav_rules.py b/vulnerabilities/pipelines/v2_improvers/clamav_rules.py
@@ -0,0 +1,201 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import gzip
+import io
+import os
+import shutil
+import tarfile
+import tempfile
+from pathlib import Path
+from typing import List
+
+import requests
+
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import DetectionRule
+from vulnerabilities.models import DetectionRuleTypes
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.utils import find_all_cve
+
+
+def extract_cvd(cvd_path, output_dir):
+    """
+    Extract a CVD file. CVD format: 512-byte header + gzipped tar archive and returns Path to output directory
+    """
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+
+    with open(cvd_path, "rb") as f:
+        f.seek(512)  # Skip header
+        compressed_data = f.read()
+
+    decompressed_data = gzip.decompress(compressed_data)
+    tar_buffer = io.BytesIO(decompressed_data)
+
+    with tarfile.open(fileobj=tar_buffer, mode="r:") as tar:
+        tar.extractall(path=output_path)
+
+    for file in output_path.rglob("*"):
+        if file.is_file():
+            file.chmod(0o644)  # rw-r--r--
+    return output_path
+
+
+def parse_ndb_file(ndb_path: Path) -> List[dict]:
+    """Parse a .ndb file (extended signatures). Return list of dicts."""
+    signatures = []
+    with ndb_path.open("r", encoding="utf-8", errors="ignore") as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+
+            parts = line.split(":")
+            if len(parts) >= 4:
+                signatures.append(
+                    {
+                        "name": parts[0],
+                        "target_type": parts[1],
+                        "offset": parts[2],
+                        "hex_signature": parts[3],
+                        "line_num": line_num,
+                    }
+                )
+    return signatures
+
+
+def parse_hdb_file(hdb_path: Path) -> List[dict]:
+    """Parse a .hdb file (MD5 hash signatures). Return list of dicts."""
+    signatures = []
+    with hdb_path.open("r", encoding="utf-8", errors="ignore") as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+
+            parts = line.split(":")
+            if len(parts) >= 3:
+                signatures.append(
+                    {
+                        "hash": parts[0],
+                        "file_size": parts[1],
+                        "name": parts[2],
+                        "line_num": line_num,
+                    }
+                )
+    return signatures
+
+
+def extract_cve_id(name: str):
+    """Normalize underscores and extract the first CVE ID from a string, or None."""
+    normalized = name.replace("_", "-")
+    cves = [cve.upper() for cve in find_all_cve(normalized)]
+    return cves[0] if cves else None
+
+
+class ClamVRulesImproverPipeline(VulnerableCodeBaseImporterPipelineV2):
+    """
+    Pipeline that downloads ClamAV database (main.cvd), extracts signatures,
+    parses .ndb and .hdb files and save a detection rules.
+    """
+
+    pipeline_id = "clamv_rules"
+    MAIN_DATABASE_URL = "https://database.clamav.net/main.cvd"
+    license_url = "https://github.com/Cisco-Talos/clamav/blob/c73755d3fc130b0c60ccf4e8f8d28c62fc58c95b/README.md#licensing"
+    license_expression = "GNU GENERAL PUBLIC LICENSE"
+
+    @classmethod
+    def steps(cls):
+        return (
+            cls.download_database,
+            cls.extract_database,
+            cls.collect_and_store_advisories,
+            cls.clean_downloads,
+        )
+
+    def download_database(self):
+        """Download ClamAV database using the supported API with proper headers."""
+
+        self.log("Downloading ClamAV database…")
+        self.db_dir = Path(tempfile.mkdtemp()) / "clamav_db"
+        self.db_dir.mkdir(parents=True, exist_ok=True)
+
+        database_url = "https://database.clamav.net/main.cvd?api-version=1"
+        headers = {
+            "User-Agent": "ClamAV-Client/1.0 (https://github.com/yourproject)",
+            "Accept": "*/*",
+        }
+
+        filename = self.db_dir / "main.cvd"
+        self.log(f"Downloading {database_url} → {filename}")
+
+        resp = requests.get(database_url, headers=headers, stream=True, timeout=30)
+        resp.raise_for_status()
+
+        with filename.open("wb") as f:
+            for chunk in resp.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+
+        self.log("ClamAV DB file downloaded successfully.")
+
+    def extract_database(self):
+        """Extract the downloaded CVD into a directory"""
+        out_dir = self.db_dir / "extracted"
+        self.extract_cvd_dir = extract_cvd(self.db_dir / "main.cvd", out_dir)
+        self.log(f"Extracted CVD to {self.extract_cvd_dir}")
+
+    def collect_and_store_advisories(self):
+        """Parse .ndb and .hdb files and store rules in the DB."""
+
+        for rule_entry in parse_hdb_file(self.extract_cvd_dir / "main.hdb") + parse_ndb_file(
+            self.extract_cvd_dir / "main.ndb"
+        ):
+            name = rule_entry.get("name", "")
+            cve_id = extract_cve_id(name)
+            found_advisories = set()
+
+            if cve_id:
+                try:
+                    if alias := AdvisoryAlias.objects.get(alias=cve_id):
+                        for adv in alias.advisories.all():
+                            found_advisories.add(adv)
+                except AdvisoryAlias.DoesNotExist:
+                    self.log(f"Advisory {cve_id} not found.")
+
+            for adv in found_advisories:
+                DetectionRule.objects.update_or_create(
+                    rule_text=str(rule_entry),
+                    rule_type=DetectionRuleTypes.CLAMAV,
+                    advisory=adv,
+                    defaults={
+                        "source_url": self.MAIN_DATABASE_URL,
+                    },
+                )
+
+            if not found_advisories:
+                DetectionRule.objects.update_or_create(
+                    rule_text=str(rule_entry),
+                    rule_type=DetectionRuleTypes.CLAMAV,
+                    advisory=None,
+                    defaults={
+                        "source_url": self.MAIN_DATABASE_URL,
+                    },
+                )
+
+    def clean_downloads(self):
+        """Clean up downloaded files."""
+        if getattr(self, "db_dir", None) and os.path.exists(self.db_dir):
+            shutil.rmtree(self.db_dir, ignore_errors=True)
+            self.log("Cleaned up downloaded files.")
+
+    def on_failure(self):
+        """Ensure cleanup on failure."""
+        self.clean_downloads()