Skip to content
Closed

WIP #15135

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions docs/content/releases/os_upgrading/3.2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
---
title: 'Upgrading to DefectDojo Version 3.2.x'
toc_hide: true
weight: -20260701
description: Findings can now carry multiple CWEs via a new Finding_CWE relationship; vulnerability ids gain an autodetected type. A migration creates the CWE table, adds the type column, de-duplicates vulnerability-id rows, and adds a uniqueness constraint. Existing hash codes are unaffected.
---

## Multiple CWEs per finding

A finding could previously store only one CWE (the integer `cwe` field). This release adds a
dedicated `Finding_CWE` relationship so a finding can carry **multiple CWEs**. The primary CWE
stays on `Finding.cwe` (unchanged — legacy deduplication and hash codes still use it); additional
CWEs live in the relationship.

CWE is modeled separately from vulnerability identifiers (CVE, GHSA, …) on purpose: a CWE is a
weakness *class*, not a vulnerability *instance* identifier, so it must not participate in
`hash_code`, vulnerability-id deduplication, or the `cve` field. Because of this separation,
**existing hash codes and deduplication are unaffected** by this change.

CWEs are populated automatically on import and when a finding is created or edited (from the
finding's CWE field, plus any additional CWEs a parser supplies). The finding exposes them via
`finding.cwes` (primary first, deduplicated).

## Vulnerability id type

Each `Vulnerability_Id` gains an autodetected `vulnerability_id_type` — the identifier's leading
prefix (`CVE-2024-1234` → `CVE`, `GHSA-…` → `GHSA`, `RUSTSEC-…` → `RUSTSEC`). It is derived
structurally (no registry) and stored (indexed) so identifiers can be filtered and grouped by type
efficiently. It is `NULL` when there is no non-numeric prefix.

## Database migration

A migration (`0273_finding_cwe_and_vulnerability_id_type`) runs automatically on upgrade and:

- creates the `Finding_CWE` table (unique per `(finding, cwe)`);
- adds the indexed `vulnerability_id_type` column to the vulnerability-id table;
- removes duplicate `(finding, vulnerability_id)` rows, keeping the earliest of each — such
duplicates are unintended, and consolidating them allows a uniqueness constraint to be added;
- adds a unique constraint on `(finding, vulnerability_id)`.

### What you need to do

The migration is applied automatically. New and edited findings populate their CWE relationship
automatically. To backfill `Finding_CWE` rows for **existing** findings, run the command after
upgrading:

```
manage.py migrate_cwe
```

The command is idempotent (safe to re-run).

For more information, check the [Release Notes](https://github.com/DefectDojo/django-DefectDojo/releases/tag/3.2.0).
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import django.db.models.deletion
from django.db import migrations, models


def dedupe_vulnerability_ids(apps, schema_editor):
"""Remove duplicate (finding, vulnerability_id) rows, keeping the lowest id, so the
unique constraint below can be added. Postgres."""
schema_editor.execute(
"""
DELETE FROM dojo_vulnerability_id a
USING dojo_vulnerability_id b
WHERE a.finding_id = b.finding_id
AND a.vulnerability_id = b.vulnerability_id
AND a.id > b.id
""",
)


class Migration(migrations.Migration):

dependencies = [
("dojo", "0272_reencrypt_tool_config_credentials_aes_gcm"),
]

operations = [
migrations.CreateModel(
name="Finding_CWE",
fields=[
("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("cwe", models.IntegerField(db_index=True)),
("finding", models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, to="dojo.finding")),
],
),
migrations.AddConstraint(
model_name="finding_cwe",
constraint=models.UniqueConstraint(fields=("finding", "cwe"), name="unique_finding_cwe"),
),
migrations.AddField(
model_name="vulnerability_id",
name="vulnerability_id_type",
field=models.CharField(blank=True, db_index=True, editable=False, max_length=20, null=True),
),
migrations.RunPython(dedupe_vulnerability_ids, migrations.RunPython.noop),
migrations.AddConstraint(
model_name="vulnerability_id",
constraint=models.UniqueConstraint(fields=("finding", "vulnerability_id"), name="unique_finding_vulnerability_id"),
),
]
53 changes: 53 additions & 0 deletions dojo/finding/api/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
from dojo.authorization.authorization import user_has_permission
from dojo.celery_dispatch import dojo_dispatch_task
from dojo.finding.helper import (
save_cwes,
save_endpoints_template,
save_vulnerability_ids,
save_vulnerability_ids_template,
)
from dojo.finding.models import BurpRawRequestResponse
from dojo.finding.vulnerability_id import cwe_number
from dojo.jira import services as jira_services
from dojo.jira.api.serializers import JIRAIssueSerializer
from dojo.location.models import LocationFindingReference
Expand All @@ -32,6 +34,7 @@
Endpoint,
Engagement,
Finding,
Finding_CWE,
Finding_Group,
Finding_Template,
Note_Type,
Expand Down Expand Up @@ -298,6 +301,29 @@ class Meta:
fields = ["vulnerability_id"]


class CweField(serializers.Field):

"""Serialize a CWE as the canonical ``CWE-<n>`` string; accept ``"CWE-79"`` or ``"79"`` on write."""

def to_representation(self, value):
return f"CWE-{value}"

def to_internal_value(self, data):
number = cwe_number(data)
if number is None:
msg = "Enter a CWE number, e.g. 89 or CWE-89."
raise serializers.ValidationError(msg)
return number


class FindingCweSerializer(serializers.ModelSerializer):
cwe = CweField()

class Meta:
model = Finding_CWE
fields = ["cwe"]


class FindingSerializer(serializers.ModelSerializer):
mitigated = serializers.DateTimeField(required=False, allow_null=True)
mitigated_by = serializers.PrimaryKeyRelatedField(required=False, allow_null=True, queryset=User.objects.all())
Expand All @@ -321,6 +347,9 @@ class FindingSerializer(serializers.ModelSerializer):
vulnerability_ids = VulnerabilityIdSerializer(
source="vulnerability_id_set", many=True, required=False,
)
cwes = FindingCweSerializer(
source="finding_cwe_set", many=True, required=False,
)
reporter = serializers.PrimaryKeyRelatedField(
required=False, queryset=User.objects.all(),
)
Expand Down Expand Up @@ -417,6 +446,13 @@ def update(self, instance, validated_data):
logger.debug("SETTING CVE FROM VULNERABILITY_ID_SET: %s", parsed_vulnerability_ids[0])
validated_data["cve"] = parsed_vulnerability_ids[0]

# CWEs (mirror vulnerability_ids): the first entry is the primary Finding.cwe; the rest
# become Finding_CWE rows via save_cwes() below.
parsed_cwes = None
if (cwes := validated_data.pop("finding_cwe_set", None)) is not None:
parsed_cwes = [entry["cwe"] for entry in cwes]
validated_data["cwe"] = parsed_cwes[0] if parsed_cwes else 0

# Save the reporter on the finding
if reporter_id := validated_data.get("reporter"):
instance.reporter = reporter_id
Expand Down Expand Up @@ -445,6 +481,11 @@ def update(self, instance, validated_data):
instance, validated_data,
)

# Sync the CWE relation (separate from vulnerability ids) after the new cwe is applied.
if parsed_cwes is not None:
instance.unsaved_cwes = parsed_cwes[1:]
save_cwes(instance)

if settings.V3_FEATURE_LOCATIONS and locations is not None:
for location_ref in instance.locations.all():
location_ref.location.disassociate_from_finding(instance)
Expand Down Expand Up @@ -561,6 +602,9 @@ class FindingCreateSerializer(serializers.ModelSerializer):
vulnerability_ids = VulnerabilityIdSerializer(
source="vulnerability_id_set", many=True, required=False,
)
cwes = FindingCweSerializer(
source="finding_cwe_set", many=True, required=False,
)
reporter = serializers.PrimaryKeyRelatedField(
required=False, queryset=User.objects.all(),
)
Expand Down Expand Up @@ -601,6 +645,12 @@ def create(self, validated_data):
validated_data["cve"] = parsed_vulnerability_ids[0]
# validated_data["unsaved_vulnerability_ids"] = parsed_vulnerability_ids

# CWEs (mirror vulnerability_ids): first entry is the primary cwe, the rest are extras.
parsed_cwes = None
if (cwes := validated_data.pop("finding_cwe_set", None)) is not None:
parsed_cwes = [entry["cwe"] for entry in cwes]
validated_data["cwe"] = parsed_cwes[0] if parsed_cwes else 0

# super.create() doesn't accept unsaved_vulnerability_ids or dedupe_option=False, so call save directly.
new_finding = Finding(**validated_data)
new_finding.unsaved_vulnerability_ids = parsed_vulnerability_ids or []
Expand All @@ -617,6 +667,9 @@ def create(self, validated_data):
new_finding.reviewers.set(reviewers)
if parsed_vulnerability_ids:
save_vulnerability_ids(new_finding, parsed_vulnerability_ids)
if parsed_cwes is not None:
new_finding.unsaved_cwes = parsed_cwes[1:]
save_cwes(new_finding)

if push_to_jira:
jira_services.push(new_finding)
Expand Down
4 changes: 2 additions & 2 deletions dojo/finding/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,11 @@ def build_candidate_scope_queryset(test, mode="deduplication", service=None):
queryset = Finding.objects.filter(scope_q)

if settings.V3_FEATURE_LOCATIONS:
prefetch_list = ["locations__location__url", "vulnerability_id_set", "found_by"]
prefetch_list = ["locations__location__url", "vulnerability_id_set", "finding_cwe_set", "found_by"]
else:
# TODO: Delete this after the move to Locations
# Base prefetches for both modes
prefetch_list = ["endpoints", "vulnerability_id_set", "found_by"]
prefetch_list = ["endpoints", "vulnerability_id_set", "finding_cwe_set", "found_by"]

# Prefetch all endpoint statuses with their endpoint for reimport mode.
# The non-special filtering (excluding false_positive, out_of_scope, risk_accepted)
Expand Down
26 changes: 25 additions & 1 deletion dojo/finding/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
do_false_positive_history_batch,
get_finding_models_for_deduplication,
)
from dojo.finding.vulnerability_id import resolve_vulnerability_id_type
from dojo.jira import services as jira_services
from dojo.location.models import Location
from dojo.location.status import FindingLocationStatus
Expand All @@ -36,6 +37,7 @@
Engagement,
FileUpload,
Finding,
Finding_CWE,
Finding_Group,
JIRA_Instance,
Notes,
Expand Down Expand Up @@ -1004,7 +1006,7 @@ def save_vulnerability_ids(finding, vulnerability_ids, *, delete_existing: bool
Vulnerability_Id.objects.filter(finding=finding).delete()

Vulnerability_Id.objects.bulk_create([
Vulnerability_Id(finding=finding, vulnerability_id=vid)
Vulnerability_Id(finding=finding, vulnerability_id=vid, vulnerability_id_type=resolve_vulnerability_id_type(vid))
for vid in vulnerability_ids
])

Expand All @@ -1015,6 +1017,28 @@ def save_vulnerability_ids(finding, vulnerability_ids, *, delete_existing: bool
finding.cve = None


def save_cwes(finding, *, delete_existing: bool = True):
"""
Persist the finding's CWEs as Finding_CWE rows.

The primary Finding.cwe plus any parser-supplied unsaved_cwes. CWE is a weakness class,
kept separate from vulnerability ids.
"""
cwe_numbers = []
if finding.cwe and finding.cwe > 0:
cwe_numbers.append(finding.cwe)
cwe_numbers += [cwe for cwe in (getattr(finding, "unsaved_cwes", None) or []) if cwe and cwe > 0]
cwe_numbers = list(dict.fromkeys(cwe_numbers))

if delete_existing:
Finding_CWE.objects.filter(finding=finding).delete()

Finding_CWE.objects.bulk_create(
[Finding_CWE(finding=finding, cwe=cwe) for cwe in cwe_numbers],
ignore_conflicts=True,
)


def save_vulnerability_ids_template(finding_template, vulnerability_ids):
"""Save vulnerability IDs as newline-separated string in TextField."""
# Remove duplicates and empty strings
Expand Down
51 changes: 51 additions & 0 deletions dojo/finding/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from titlecase import titlecase

from dojo.base_models.base import BaseModel
from dojo.finding.vulnerability_id import resolve_vulnerability_id_type

# get_current_date/tomorrow/copy_model_util are defined early in dojo.models, before the
# re-export that loads this module — so this resolves despite the partial circular load, and
Expand Down Expand Up @@ -525,6 +526,9 @@ def __init__(self, *args, **kwargs):
self.unsaved_tags = None
self.unsaved_files = None
self.unsaved_vulnerability_ids = None
# Extra CWE numbers a parser wants to attach in addition to the primary self.cwe.
# Persisted as Finding_CWE rows (multiple CWEs per finding). None = none supplied.
self.unsaved_cwes = None

def __str__(self):
return self.title
Expand Down Expand Up @@ -680,6 +684,11 @@ def copy(self, test=None):
copy.found_by.set(old_found_by)
# Assign any tags
copy.tags.set(old_tags)
# Copy the vulnerability ids and CWEs (relation rows aren't copied by copy_model_util)
for vulnerability_id in self.vulnerability_id_set.all():
Vulnerability_Id.objects.create(finding=copy, vulnerability_id=vulnerability_id.vulnerability_id)
for finding_cwe in self.finding_cwe_set.all():
Finding_CWE.objects.create(finding=copy, cwe=finding_cwe.cwe)

return copy

Expand Down Expand Up @@ -1334,6 +1343,15 @@ def vulnerability_ids(self):
# Remove duplicates
return list(dict.fromkeys(vulnerability_ids))

@cached_property
def cwes(self):
# All CWEs for this finding in canonical CWE-<n> form: the primary self.cwe plus any
# additional Finding_CWE rows (multiple CWEs per finding), primary first, deduplicated.
cwe_numbers = [row.cwe for row in self.finding_cwe_set.all()]
if self.cwe and self.cwe > 0:
cwe_numbers.insert(0, self.cwe)
return [f"CWE-{cwe_number}" for cwe_number in dict.fromkeys(cwe_numbers)]

@property
def violates_sla(self):
return (self.sla_expiration_date and self.sla_expiration_date < timezone.now().date())
Expand All @@ -1357,10 +1375,43 @@ def set_hash_code(self, dedupe_option):
class Vulnerability_Id(models.Model):
finding = models.ForeignKey("dojo.Finding", editable=False, on_delete=models.CASCADE)
vulnerability_id = models.TextField(max_length=50, blank=False, null=False)
# Autodetected from the id prefix (CVE, GHSA, ...); NULL when there is no non-numeric
# prefix. Denormalized/indexed so type-scoped queries (e.g. GROUP BY type) stay cheap.
vulnerability_id_type = models.CharField(max_length=20, null=True, blank=True, editable=False, db_index=True)

class Meta:
constraints = [
models.UniqueConstraint(fields=["finding", "vulnerability_id"], name="unique_finding_vulnerability_id"),
]

def __str__(self):
return self.vulnerability_id

def save(self, *args, **kwargs):
# bulk_create paths set the type at construction; this covers save()/get_or_create.
self.vulnerability_id_type = resolve_vulnerability_id_type(self.vulnerability_id)
super().save(*args, **kwargs)

def get_absolute_url(self):
return reverse("view_finding", args=[str(self.finding.id)])


class Finding_CWE(models.Model):
# A CWE weakness associated with a finding. Separate from Vulnerability_Id because a CWE is a
# weakness class, not a vulnerability instance identifier — it must not participate in
# hash_code, vulnerability-id deduplication, or the cve field. The primary CWE stays on
# Finding.cwe; this relation lets a finding carry multiple CWEs.
finding = models.ForeignKey("dojo.Finding", editable=False, on_delete=models.CASCADE)
cwe = models.IntegerField(db_index=True)

class Meta:
constraints = [
models.UniqueConstraint(fields=["finding", "cwe"], name="unique_finding_cwe"),
]

def __str__(self):
return f"CWE-{self.cwe}"

def get_absolute_url(self):
return reverse("view_finding", args=[str(self.finding.id)])

Expand Down
Loading
Loading