Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
enhance_with_metasploit as enhance_with_metasploit_v2,
)
from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2
from vulnerabilities.pipelines.v2_improvers import unfurl_version_range as unfurl_version_range_v2
from vulnerabilities.utils import create_registry

IMPROVERS_REGISTRY = create_registry(
Expand Down Expand Up @@ -67,6 +68,7 @@
compute_package_risk_v2.ComputePackageRiskPipeline,
compute_version_rank_v2.ComputeVersionRankPipeline,
compute_advisory_todo_v2.ComputeToDo,
unfurl_version_range_v2.UnfurlVersionRangePipeline,
compute_advisory_todo.ComputeToDo,
]
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 4.2.22 on 2025-09-03 09:45

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0101_advisorytodov2_todorelatedadvisoryv2_and_more"),
]

operations = [
migrations.AlterField(
model_name="impactedpackage",
name="affecting_vers",
field=models.TextField(
blank=True,
help_text="VersionRange expression for package vulnerable to this impact.",
null=True,
),
),
migrations.AlterField(
model_name="impactedpackage",
name="base_purl",
field=models.CharField(
help_text="Version less PURL related to impacted range.", max_length=500
),
),
migrations.AlterField(
model_name="impactedpackage",
name="fixed_vers",
field=models.TextField(
blank=True,
help_text="VersionRange expression for packages fixing the vulnerable package in this impact.",
null=True,
),
),
]
41 changes: 40 additions & 1 deletion vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from functools import cached_property
from itertools import groupby
from operator import attrgetter
from traceback import format_exc as traceback_format_exc
from typing import List
from typing import Union
from urllib.parse import urljoin

Expand Down Expand Up @@ -2927,17 +2929,19 @@ class ImpactedPackage(models.Model):

base_purl = models.CharField(
max_length=500,
blank=True,
blank=False,
help_text="Version less PURL related to impacted range.",
)

affecting_vers = models.TextField(
blank=True,
null=True,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, we should have a check that at least one of affecting_vers or fixed_vers should exist while creating/saving an object

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We’re being bit flexible in what we accept to make sure we at least store the base purl, even if we don't get affecting or fixed vers. And if there are no affecting or fixed vers, we will not attempt to unfurl no harm done.

help_text="VersionRange expression for package vulnerable to this impact.",
)

fixed_vers = models.TextField(
blank=True,
null=True,
help_text="VersionRange expression for packages fixing the vulnerable package in this impact.",
)

Expand Down Expand Up @@ -3065,6 +3069,41 @@ def get_or_create_from_purl(self, purl: Union[PackageURL, str]):

return package, is_created

def bulk_get_or_create_from_purls(self, purls: List[Union[PackageURL, str]]):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we have a boolean to mark if an impactd package is unfurled, so we do not unfurl that again

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not optimize for this right now, as sometimes during run we may not get complete list of versions due to timeout or network issue, and we don't want to skip processing them in our next pass.

"""
Return new or existing Packages given ``purls`` list of PackageURL object or PURL string.
"""
purl_strings = [str(p) for p in purls]
existing_packages = PackageV2.objects.filter(package_url__in=purl_strings)
existing_purls = set(existing_packages.values_list("package_url", flat=True))

all_packages = list(existing_packages)
packages_to_create = []
for purl in purls:
if str(purl) in existing_purls:
continue

purl_dict = purl_to_dict(purl)
purl = PackageURL(**purl_dict)

normalized = normalize_purl(purl=purl)
for name, value in purl_to_dict(normalized).items():
setattr(self, name, value)

purl_dict["package_url"] = str(normalized)
purl_dict["plain_package_url"] = str(utils.plain_purl(normalized))

packages_to_create.append(PackageV2(**purl_dict))

try:
new_packages = PackageV2.objects.bulk_create(packages_to_create)
except Exception as e:
logging.error(f"Error creating PackageV2: {e} \n {traceback_format_exc()}")
return []

all_packages.extend(new_packages)
return all_packages

def only_vulnerable(self):
return self._vulnerable(True)

Expand Down
130 changes: 130 additions & 0 deletions vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
from traceback import format_exc as traceback_format_exc

from aboutcode.pipeline import LoopProgress
from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS
from packageurl import PackageURL
from univers.version_range import RANGE_CLASS_BY_SCHEMES
from univers.version_range import VersionRange

from vulnerabilities.models import ImpactedPackage
from vulnerabilities.models import PackageV2
from vulnerabilities.pipelines import VulnerableCodePipeline
from vulnerabilities.pipes.fetchcode_utils import get_versions
from vulnerabilities.utils import update_purl_version


class UnfurlVersionRangePipeline(VulnerableCodePipeline):

pipeline_id = "unfurl_version_range_v2"

@classmethod
def steps(cls):
return (cls.unfurl_version_range,)

def unfurl_version_range(self):
impacted_packages = ImpactedPackage.objects.all().order_by("-created_at")
impacted_packages_count = impacted_packages.count()

processed_impacted_packages_count = 0
processed_affected_packages_count = 0
cached_versions = {}
self.log(f"Unfurl affected vers range for {impacted_packages_count:,d} ImpactedPackage.")
progress = LoopProgress(total_iterations=impacted_packages_count, logger=self.log)
for impact in progress.iter(impacted_packages):
purl = PackageURL.from_string(impact.base_purl)
if not impact.affecting_vers or not any(
c in impact.affecting_vers for c in ("<", ">", "!")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this ignore:
vers like this?
=1.0.0 or ^1.2.0

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this is a vers string, and VERS spec only allows >, <, and ! for comparison.

):
continue
if purl.type not in FETCHCODE_SUPPORTED_ECOSYSTEMS:
continue
if purl.type not in RANGE_CLASS_BY_SCHEMES:
continue

versions = get_purl_versions(purl, cached_versions)
affected_purls = get_affected_purls(
versions=versions,
affecting_vers=impact.affecting_vers,
base_purl=purl,
logger=self.log,
)
if not affected_purls:
continue

processed_affected_packages_count += bulk_create_with_m2m(
purls=affected_purls,
impact=impact,
relation=ImpactedPackage.affecting_packages.through,
logger=self.log,
)
processed_impacted_packages_count += 1

self.log(f"Successfully processed {processed_impacted_packages_count:,d} ImpactedPackage.")
self.log(f"{processed_affected_packages_count:,d} new Impact-Package relation created.")


def get_affected_purls(versions, affecting_vers, base_purl, logger):
affecting_version_range = VersionRange.from_string(affecting_vers)
version_class = affecting_version_range.version_class

try:
versions = [version_class(v) for v in versions]
except Exception as e:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's return an empty list by default, since the calling side expects something to iterate upon

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If no purls are returned, we will skip processing the current impact right away and go no further.

logger(
f"Error while parsing versions for {base_purl!s}: {e!r} \n {traceback_format_exc()}",
level=logging.ERROR,
)
return

affected_purls = []
for version in versions:
try:
if version in affecting_version_range:
affected_purls.append(
update_purl_version(
purl=base_purl,
version=str(version),
)
)
except Exception as e:
logger(
f"Error while checking {version!s} in {affecting_version_range!s}: {e!r} \n {traceback_format_exc()}",
level=logging.ERROR,
)
return affected_purls


def get_purl_versions(purl, cached_versions={}):
if not purl in cached_versions:
cached_versions[purl] = get_versions(purl)
return cached_versions[purl]


def bulk_create_with_m2m(purls, impact, relation, logger):
"""Bulk create PackageV2 and also bulk populate M2M Impact and Package relationships."""
if not purls:
return 0

affected_packages_v2 = PackageV2.objects.bulk_get_or_create_from_purls(purls=purls)

relations = [
relation(impactedpackage=impact, packagev2=package) for package in affected_packages_v2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would there be a chance that duped relations may be created from this ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No duplicate relation can be created, we use ignore_conflicts=True while bulk creating to ignore already existing relations.

]

try:
relation.objects.bulk_create(relations, ignore_conflicts=True)
except Exception as e:
logger(f"Error creating ImpactedPackage {relation}: {e!r} \n {traceback_format_exc()}")
return 0

return len(relations)
8 changes: 6 additions & 2 deletions vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,12 @@ def insert_advisory_v2(
impact = ImpactedPackage.objects.create(
advisory=advisory_obj,
base_purl=str(affected_pkg.package),
affecting_vers=str(affected_pkg.affected_version_range),
fixed_vers=str(affected_pkg.fixed_version_range),
affecting_vers=str(affected_pkg.affected_version_range)
if affected_pkg.affected_version_range
else None,
fixed_vers=str(affected_pkg.fixed_version_range)
if affected_pkg.fixed_version_range
else None,
)
package_affected_purls, package_fixed_purls = get_exact_purls_v2(
affected_package=affected_pkg,
Expand Down
6 changes: 5 additions & 1 deletion vulnerabilities/pipes/fetchcode_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,18 @@
import logging
from traceback import format_exc as traceback_format_exc
from typing import Callable
from typing import Union

from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS
from fetchcode.package_versions import versions
from packageurl import PackageURL


def get_versions(purl: PackageURL, logger: Callable = None):
def get_versions(purl: Union[PackageURL, str], logger: Callable = None):
"""Return set of known versions for the given purl."""
if isinstance(purl, str):
purl = PackageURL.from_string(purl)

if purl.type not in FETCHCODE_SUPPORTED_ECOSYSTEMS:
return

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os

from pathlib import Path
from unittest.mock import Mock
from unittest.mock import patch

from django.test import TestCase
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#


from unittest.mock import patch

from django.test import TestCase

from vulnerabilities.models import AdvisoryV2
from vulnerabilities.models import ImpactedPackage
from vulnerabilities.models import PackageV2
from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import UnfurlVersionRangePipeline


class TestUnfurlVersionRangePipeline(TestCase):
def setUp(self):
self.advisory1 = AdvisoryV2.objects.create(
datasource_id="ghsa",
advisory_id="GHSA-1234",
avid="ghsa/GHSA-1234",
unique_content_id="f" * 64,
url="https://example.com/advisory",
date_collected="2025-07-01T00:00:00Z",
)

self.impact1 = ImpactedPackage.objects.create(
advisory=self.advisory1,
base_purl="pkg:npm/foobar",
affecting_vers="vers:npm/>3.2.1|<4.0.0",
fixed_vers=None,
)

@patch("vulnerabilities.pipelines.v2_improvers.unfurl_version_range.get_purl_versions")
def test_affecting_version_range_unfurl(self, mock_fetch):
self.assertEqual(0, PackageV2.objects.count())
mock_fetch.return_value = {"3.4.1", "3.9.0", "2.1.0", "4.0.0", "4.1.0"}
pipeline = UnfurlVersionRangePipeline()
pipeline.execute()

self.assertEqual(2, PackageV2.objects.count())
self.assertEqual(2, self.impact1.affecting_packages.count())