Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from vulnerabilities.pipelines import flag_ghost_packages
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
from vulnerabilities.pipelines import remove_duplicate_advisories
from vulnerabilities.pipelines.v2_improvers import clamav_rules
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
from vulnerabilities.pipelines.v2_improvers import (
Expand Down Expand Up @@ -70,5 +71,6 @@
compute_advisory_todo_v2.ComputeToDo,
unfurl_version_range_v2.UnfurlVersionRangePipeline,
compute_advisory_todo.ComputeToDo,
clamav_rules.ClamVRulesImproverPipeline,
]
)
59 changes: 59 additions & 0 deletions vulnerabilities/migrations/0104_advisorydetectionrule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Generated by Django 4.2.25 on 2025-12-01 20:13

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0103_codecommit_impactedpackage_affecting_commits_and_more"),
]

operations = [
migrations.CreateModel(
name="AdvisoryDetectionRule",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
(
"rule_text",
models.TextField(
help_text="Full text of the detection rule, script, or signature."
),
),
(
"rule_type",
models.CharField(
blank=True,
choices=[
("yara", "YARA"),
("sigma", "Sigma Detection Rule"),
("clamav", "ClamAV Signature"),
],
max_length=100,
),
),
(
"source_url",
models.URLField(
blank=True,
help_text="URL or reference to the source of the rule (vendor feed, GitHub repo, etc.).",
null=True,
),
),
(
"advisory",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="detection_rules",
to="vulnerabilities.advisoryv2",
),
),
],
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.2.25 on 2025-12-01 21:52

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0104_advisorydetectionrule"),
]

operations = [
migrations.AlterField(
model_name="advisorydetectionrule",
name="advisory",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="detection_rules",
to="vulnerabilities.advisoryv2",
),
),
]
42 changes: 42 additions & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3414,3 +3414,45 @@ class CodeCommit(models.Model):

class Meta:
unique_together = ("commit_hash", "vcs_url")


class DetectionRuleTypes(models.TextChoices):
"""Defines the supported formats for security detection rules."""

YARA = "yara", "Yara"
YARA_X = "yara-x", "Yara-X"
SIGMA = "sigma", "Sigma"
CLAMAV = "clamav", "CLAMAV"
SURICATA = "suricata", "Suricata"


class DetectionRule(models.Model):
"""
A Detection Rule is code used to identify malicious activity or security threats.
"""

rule_type = models.CharField(
max_length=50,
choices=DetectionRuleTypes.choices,
help_text="The type of the detection rule content (e.g., YARA, Sigma).",
)

source_url = models.URLField(
max_length=1024, help_text="URL to the original source or reference for this rule."
)

rule_metadata = models.JSONField(
null=True,
blank=True,
help_text="Additional structured data such as tags, or author information.",
)

rule_text = models.TextField(help_text="The content of the detection signature.")

advisory = models.ForeignKey(
AdvisoryV2,
related_name="detection_rules",
on_delete=models.SET_NULL,
null=True,
blank=True,
)
201 changes: 201 additions & 0 deletions vulnerabilities/pipelines/v2_improvers/clamav_rules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import gzip
import io
import os
import shutil
import tarfile
import tempfile
from pathlib import Path
from typing import List

import requests

from vulnerabilities.models import AdvisoryAlias
from vulnerabilities.models import DetectionRule
from vulnerabilities.models import DetectionRuleTypes
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
from vulnerabilities.utils import find_all_cve


def extract_cvd(cvd_path, output_dir):
"""
Extract a CVD file. CVD format: 512-byte header + gzipped tar archive and returns Path to output directory
"""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)

with open(cvd_path, "rb") as f:
f.seek(512) # Skip header
compressed_data = f.read()

decompressed_data = gzip.decompress(compressed_data)
tar_buffer = io.BytesIO(decompressed_data)

with tarfile.open(fileobj=tar_buffer, mode="r:") as tar:
tar.extractall(path=output_path)

for file in output_path.rglob("*"):
if file.is_file():
file.chmod(0o644) # rw-r--r--
return output_path


def parse_ndb_file(ndb_path: Path) -> List[dict]:
"""Parse a .ndb file (extended signatures). Return list of dicts."""
signatures = []
with ndb_path.open("r", encoding="utf-8", errors="ignore") as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line or line.startswith("#"):
continue

parts = line.split(":")
if len(parts) >= 4:
signatures.append(
{
"name": parts[0],
"target_type": parts[1],
"offset": parts[2],
"hex_signature": parts[3],
"line_num": line_num,
}
)
return signatures


def parse_hdb_file(hdb_path: Path) -> List[dict]:
"""Parse a .hdb file (MD5 hash signatures). Return list of dicts."""
signatures = []
with hdb_path.open("r", encoding="utf-8", errors="ignore") as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line or line.startswith("#"):
continue

parts = line.split(":")
if len(parts) >= 3:
signatures.append(
{
"hash": parts[0],
"file_size": parts[1],
"name": parts[2],
"line_num": line_num,
}
)
return signatures


def extract_cve_id(name: str):
"""Normalize underscores and extract the first CVE ID from a string, or None."""
normalized = name.replace("_", "-")
cves = [cve.upper() for cve in find_all_cve(normalized)]
return cves[0] if cves else None


class ClamVRulesImproverPipeline(VulnerableCodeBaseImporterPipelineV2):
"""
Pipeline that downloads ClamAV database (main.cvd), extracts signatures,
parses .ndb and .hdb files and save a detection rules.
"""

pipeline_id = "clamv_rules"
MAIN_DATABASE_URL = "https://database.clamav.net/main.cvd"
license_url = "https://github.com/Cisco-Talos/clamav/blob/c73755d3fc130b0c60ccf4e8f8d28c62fc58c95b/README.md#licensing"
license_expression = "GNU GENERAL PUBLIC LICENSE"

@classmethod
def steps(cls):
return (
cls.download_database,
cls.extract_database,
cls.collect_and_store_advisories,
cls.clean_downloads,
)

def download_database(self):
"""Download ClamAV database using the supported API with proper headers."""

self.log("Downloading ClamAV database…")
self.db_dir = Path(tempfile.mkdtemp()) / "clamav_db"
self.db_dir.mkdir(parents=True, exist_ok=True)

database_url = "https://database.clamav.net/main.cvd?api-version=1"
headers = {
"User-Agent": "ClamAV-Client/1.0 (https://github.com/yourproject)",
"Accept": "*/*",
}

filename = self.db_dir / "main.cvd"
self.log(f"Downloading {database_url} → {filename}")

resp = requests.get(database_url, headers=headers, stream=True, timeout=30)
resp.raise_for_status()

with filename.open("wb") as f:
for chunk in resp.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)

self.log("ClamAV DB file downloaded successfully.")

def extract_database(self):
"""Extract the downloaded CVD into a directory"""
out_dir = self.db_dir / "extracted"
self.extract_cvd_dir = extract_cvd(self.db_dir / "main.cvd", out_dir)
self.log(f"Extracted CVD to {self.extract_cvd_dir}")

def collect_and_store_advisories(self):
"""Parse .ndb and .hdb files and store rules in the DB."""

for rule_entry in parse_hdb_file(self.extract_cvd_dir / "main.hdb") + parse_ndb_file(
self.extract_cvd_dir / "main.ndb"
):
name = rule_entry.get("name", "")
cve_id = extract_cve_id(name)
found_advisories = set()

if cve_id:
try:
if alias := AdvisoryAlias.objects.get(alias=cve_id):
for adv in alias.advisories.all():
found_advisories.add(adv)
except AdvisoryAlias.DoesNotExist:
self.log(f"Advisory {cve_id} not found.")

for adv in found_advisories:
DetectionRule.objects.update_or_create(
rule_text=str(rule_entry),
rule_type=DetectionRuleTypes.CLAMAV,
advisory=adv,
defaults={
"source_url": self.MAIN_DATABASE_URL,
},
)

if not found_advisories:
DetectionRule.objects.update_or_create(
rule_text=str(rule_entry),
rule_type=DetectionRuleTypes.CLAMAV,
advisory=None,
defaults={
"source_url": self.MAIN_DATABASE_URL,
},
)

def clean_downloads(self):
"""Clean up downloaded files."""
if getattr(self, "db_dir", None) and os.path.exists(self.db_dir):
shutil.rmtree(self.db_dir, ignore_errors=True)
self.log("Cleaned up downloaded files.")

def on_failure(self):
"""Ensure cleanup on failure."""
self.clean_downloads()
Loading
Loading