Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
from vulnerabilities.pipelines.v2_importers import fireeye_importer_v2
from vulnerabilities.pipelines.v2_importers import gentoo_importer as gentoo_importer_v2
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
from vulnerabilities.pipelines.v2_importers import (
github_osv_live_importer as github_osv_live_importer_v2,
)
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
from vulnerabilities.pipelines.v2_importers import mattermost_importer as mattermost_importer_v2
Expand Down Expand Up @@ -189,3 +192,9 @@
collect_fix_commits_v2.CollectGitlabFixCommitsPipeline,
]
)

LIVE_IMPORTERS_REGISTRY = create_registry(
[
github_osv_live_importer_v2.GithubOSVLiveImporterPipeline,
]
)
162 changes: 162 additions & 0 deletions vulnerabilities/pipelines/v2_importers/github_osv_live_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import json

import dateparser
import requests
from packageurl import PackageURL
from univers.version_range import RANGE_CLASS_BY_SCHEMES

from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
from vulnerabilities.pipes.osv_v2 import parse_advisory_data_v3
from vulnerabilities.utils import fetch_response

ECOSYSTEM_BY_PURL_TYPE = {
"pypi": "PyPI",
"npm": "npm",
"maven": "Maven",
"composer": "Packagist",
"hex": "Hex",
"gem": "RubyGems",
"nuget": "NuGet",
"cargo": "crates.io",
}


class GithubOSVLiveImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
"""
GithubOSV Live Importer Pipeline

Collect advisories from GitHub Advisory Database for a single PURL.
"""

pipeline_id = "github_osv_live_importer_v2"
spdx_license_expression = "CC-BY-4.0"
license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md"
supported_types = ["pypi", "npm", "maven", "composer", "hex", "gem", "nuget", "cargo"]

@classmethod
def steps(cls):
return (
cls.get_purl_inputs,
cls.get_osv_advisories_urls,
cls.collect_and_store_advisories,
)

def get_purl_inputs(self):
purl = self.inputs.get("purl")
if not purl:
raise ValueError("PURL is required for GithubOSVLiveImporterPipeline")

if isinstance(purl, str):
purl = PackageURL.from_string(purl)

if not isinstance(purl, PackageURL):
raise ValueError(f"Object of type {type(purl)} {purl!r} is not a PackageURL instance")

if purl.type not in self.supported_types:
raise ValueError(
f"PURL: {purl!s} is not among the supported package types {self.supported_types!r}"
)

if not purl.version:
raise ValueError(f"PURL: {purl!s} is expected to have a version")

self.purl = purl

def advisories_count(self):
return len(self.advisory_urls)

def collect_advisories(self):
"""
Fetch and parse advisory data from GitHub Advisory Database URLs, Filters the packages to
ensure they match the exact type, name, and namespace of the target PURL, and ensure the target
version falls within the affected or fixed version ranges and yield these related advisories
"""
version_range = RANGE_CLASS_BY_SCHEMES.get(self.purl.type)
version_obj = version_range.version_class(self.purl.version)
for advisory_url in self.advisory_urls:
response = fetch_response(advisory_url)
raw_data = json.loads(response.content)

advisory = parse_advisory_data_v3(
raw_data=raw_data,
supported_ecosystems=self.supported_types,
advisory_url=advisory_url,
advisory_text=json.dumps(raw_data, ensure_ascii=False),
)

filtered_affected_packages = [
affected_package
for affected_package in advisory.affected_packages
if affected_package.package
and affected_package.package.type == self.purl.type
and affected_package.package.name == self.purl.name
and (affected_package.package.namespace or "") == (self.purl.namespace or "")
]

if not filtered_affected_packages:
continue

for affected_package in filtered_affected_packages:
if (
affected_package.affected_version_range
and version_obj in affected_package.affected_version_range
) or (
affected_package.fixed_version_range
and version_obj in affected_package.fixed_version_range
):
yield advisory

def get_osv_advisories_urls(self):
"""
Fetch a list of OSV advisory dicts from the OSV API for a given PURL,
filtered to only GitHub advisories (GHSA-*) and return the Advisories URLS.
"""
ecosystem = ECOSYSTEM_BY_PURL_TYPE.get(self.purl.type)
if not ecosystem:
return []

# Query by package to get all advisories for that package; we filter GHSA below.
body = {"package": {"ecosystem": ecosystem, "name": _osv_package_name(self.purl)}}
resp = requests.post("https://api.osv.dev/v1/query", json=body, timeout=30)
if resp.status_code != 200:
return []

data = resp.json() or {}
advisories = data.get("vulns") or []
self.advisory_urls = set()
for advisory in advisories:
adv_id = advisory.get("id") or ""
aliases = advisory.get("aliases") or []
advisory_ids = [adv_id] + aliases
for ghsa_id in advisory_ids:
if not ghsa_id.startswith("GHSA-"):
continue

published_date = advisory.get("published")
advisory_url = build_github_repo_advisory_url(
published_date, ghsa_id, logger=self.log
)
self.advisory_urls.add(advisory_url)


def build_github_repo_advisory_url(published_date, advisory_id, logger):
"""
Return the advisory JSON URL in the GitHub advisory-database repo, using the GHSA path:
advisories/github-reviewed/YYYY/MM/GHSA-ID/GHSA-ID.json
"""
if not published_date:
logger(f"Cannot build URL for {advisory_id}: Missing both published and modified dates")

parsed_date = dateparser.parse(date_string=published_date)
year = parsed_date.strftime("%Y")
month = parsed_date.strftime("%m")
return f"https://raw.githubusercontent.com/github/advisory-database/refs/heads/main/advisories/github-reviewed/{year}/{month}/{advisory_id}/{advisory_id}.json"


def _osv_package_name(purl: PackageURL) -> str:
# Maven uses groupId:artifactId, most others use namespace/name when namespace exists
if purl.type == "maven" and purl.namespace:
return f"{purl.namespace}:{purl.name}"
if purl.namespace:
return f"{purl.namespace}/{purl.name}"
return purl.name
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json
from pathlib import Path
from unittest import mock

import pytest
from packageurl import PackageURL

from vulnerabilities.models import AdvisoryV2
from vulnerabilities.pipelines.v2_importers.github_osv_live_importer import (
GithubOSVLiveImporterPipeline,
)
from vulnerabilities.pipelines.v2_importers.github_osv_live_importer import (
build_github_repo_advisory_url,
)
from vulnerabilities.tests import util_tests

TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "live_github_osv"


@pytest.mark.django_db
@mock.patch("vulnerabilities.pipelines.v2_importers.github_osv_live_importer.fetch_response")
@mock.patch("vulnerabilities.pipelines.v2_importers.github_osv_live_importer.requests.post")
def test_github_osv_live_importer(mocker_osv, mock_github_osv):
purl = PackageURL(type="pypi", name="django", version="1.4.2")

mocker_osv.return_value.status_code = 200
osv_api_path = TEST_DATA / "fetch_osv_api.json"
with open(osv_api_path, encoding="utf-8") as f:
mocker_osv.return_value.json.return_value = json.load(f)

github_osv_path = TEST_DATA / "fetch_github_osv.json"
with open(github_osv_path, encoding="utf-8") as f:
raw_advisory_list = json.load(f)

mock_github_osv.side_effect = lambda url: mock.Mock(
content=json.dumps(next(adv for adv in raw_advisory_list if adv.get("id") in url))
)

pipeline = GithubOSVLiveImporterPipeline(purl=purl)
pipeline.execute()

expected_file = TEST_DATA / "expected-advisories.json"
result = [adv.to_advisory_data().to_dict() for adv in AdvisoryV2.objects.all()]
util_tests.check_results_against_json(result, expected_file)


@pytest.mark.parametrize(
"published_date, advisory_id, expected_url",
[
(
"2022-05-17T05:10:31Z",
"GHSA-2655-q453-22f9",
"https://raw.githubusercontent.com/github/advisory-database/refs/heads/main/advisories/github-reviewed/2022/05/GHSA-2655-q453-22f9/GHSA-2655-q453-22f9.json",
),
(
"2017-10-24T18:33:37Z",
"GHSA-4936-rj25-6wm6",
"https://raw.githubusercontent.com/github/advisory-database/refs/heads/main/advisories/github-reviewed/2017/10/GHSA-4936-rj25-6wm6/GHSA-4936-rj25-6wm6.json",
),
],
)
def test_build_github_repo_advisory_url(published_date, advisory_id, expected_url):
assert build_github_repo_advisory_url(published_date, advisory_id, logger=print) == expected_url
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
[
{
"advisory_id": "GHSA-296w-6qhq-gf92",
"aliases": [
"CVE-2014-0481"
],
"summary": "Django denial of service via file upload naming\nThe default configuration for the file upload handling system in Django before 1.4.14, 1.5.x before 1.5.9, 1.6.x before 1.6.6, and 1.7 before release candidate 3 uses a sequential file name generation process when a file with a conflicting name is uploaded, which allows remote attackers to cause a denial of service (CPU consumption) by unloading a multiple files with the same name.",
"affected_packages": [
{
"package": {
"type": "pypi",
"namespace": "",
"name": "django",
"version": "",
"qualifiers": "",
"subpath": ""
},
"affected_version_range": "vers:pypi/<1.4.14",
"fixed_version_range": "vers:pypi/1.4.14",
"introduced_by_commit_patches": [],
"fixed_by_commit_patches": []
},
{
"package": {
"type": "pypi",
"namespace": "",
"name": "django",
"version": "",
"qualifiers": "",
"subpath": ""
},
"affected_version_range": "vers:pypi/>=1.5|<1.5.9",
"fixed_version_range": "vers:pypi/1.5.9",
"introduced_by_commit_patches": [],
"fixed_by_commit_patches": []
},
{
"package": {
"type": "pypi",
"namespace": "",
"name": "django",
"version": "",
"qualifiers": "",
"subpath": ""
},
"affected_version_range": "vers:pypi/>=1.6|<1.6.6",
"fixed_version_range": "vers:pypi/1.6.6",
"introduced_by_commit_patches": [],
"fixed_by_commit_patches": []
}
],
"references": [
{
"reference_id": "",
"reference_type": "",
"url": "http://lists.opensuse.org/opensuse-updates/2014-09/msg00023.html"
},
{
"reference_id": "",
"reference_type": "",
"url": "http://www.debian.org/security/2014/dsa-3010"
},
{
"reference_id": "",
"reference_type": "",
"url": "https://github.com/django/django"
},
{
"reference_id": "",
"reference_type": "",
"url": "https://github.com/django/django/commit/26cd48e166ac4d84317c8ee6d63ac52a87e8da99"
},
{
"reference_id": "",
"reference_type": "",
"url": "https://github.com/django/django/commit/30042d475bf084c6723c6217a21598d9247a9c41"
},
{
"reference_id": "",
"reference_type": "",
"url": "https://github.com/django/django/commit/dd0c3f4ee1a30c1a1e6055061c6ba6e58c6b54d1"
},
{
"reference_id": "",
"reference_type": "",
"url": "https://github.com/pypa/advisory-database/tree/main/vulns/django/PYSEC-2014-5.yaml"
},
{
"reference_id": "",
"reference_type": "",
"url": "https://nvd.nist.gov/vuln/detail/CVE-2014-0481"
},
{
"reference_id": "",
"reference_type": "",
"url": "https://www.djangoproject.com/weblog/2014/aug/20/security"
}
],
"patches": [],
"severities": [
{
"system": "cvssv3.1",
"value": "7.5",
"scoring_elements": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H"
},
{
"system": "cvssv4",
"value": "8.7",
"scoring_elements": "CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:N/VC:N/VI:N/VA:H/SC:N/SI:N/SA:N"
},
{
"system": "generic_textual",
"value": "HIGH",
"scoring_elements": ""
}
],
"date_published": "2022-05-14T02:05:08+00:00",
"weaknesses": [
400
],
"url": "https://raw.githubusercontent.com/github/advisory-database/refs/heads/main/advisories/github-reviewed/2022/05/GHSA-296w-6qhq-gf92/GHSA-296w-6qhq-gf92.json"
}
]
Loading
Loading