From 4ec75adb6577fdee113595abc56fc94f4437615d Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 10:06:36 -0700 Subject: [PATCH 01/13] test: add Orca Security parser sample scan files Authored by T. Walker - DefectDojo --- unittests/scans/orca_security/many_vulns.csv | 6 ++ unittests/scans/orca_security/many_vulns.json | 62 +++++++++++++++++++ unittests/scans/orca_security/no_vuln.csv | 1 + unittests/scans/orca_security/no_vuln.json | 1 + unittests/scans/orca_security/one_vuln.csv | 2 + unittests/scans/orca_security/one_vuln.json | 14 +++++ 6 files changed, 86 insertions(+) create mode 100644 unittests/scans/orca_security/many_vulns.csv create mode 100644 unittests/scans/orca_security/many_vulns.json create mode 100644 unittests/scans/orca_security/no_vuln.csv create mode 100644 unittests/scans/orca_security/no_vuln.json create mode 100644 unittests/scans/orca_security/one_vuln.csv create mode 100644 unittests/scans/orca_security/one_vuln.json diff --git a/unittests/scans/orca_security/many_vulns.csv b/unittests/scans/orca_security/many_vulns.csv new file mode 100644 index 00000000000..d5267b8e760 --- /dev/null +++ b/unittests/scans/orca_security/many_vulns.csv @@ -0,0 +1,6 @@ +OrcaScore,Title,Category,Inventory,Inventory.Name,CloudAccount,CloudAccount.Name,Source,Status,CreatedAt,LastSeen,Labels +2.0,Low severity test finding,Best practices,1,ResourceA,1,account-dev,ResourceA,open,2025-01-01T08:00:00+00:00,2025-02-01T08:00:00+00:00,"[""CSPM""]" +5.1,Unused role with policy found,IAM misconfigurations,1,TestRole_abc123,1,account-test,TestRole_abc123,open,2025-01-15T10:30:00+00:00,2025-02-01T12:00:00+00:00,"[""CSPM"",""source: Orca Scan""]" +7.5,Public S3 bucket detected,Data at risk,1,my-public-bucket,1,account-prod,my-public-bucket,open,2025-02-01T14:00:00+00:00,2025-02-03T09:00:00+00:00,"[""CSPM"",""mitre: initial access""]" +9.5,Critical IAM root access key active,IAM misconfigurations,1,root,1,account-prod,root,open,2025-02-02T16:00:00+00:00,2025-02-03T16:00:00+00:00,"[""CSPM"",""critical""]" +0,Informational security note,Best practices,1,InfoResource,1,account-dev,InfoResource,closed,2024-12-01T00:00:00+00:00,2025-01-01T00:00:00+00:00,"[]" diff --git a/unittests/scans/orca_security/many_vulns.json b/unittests/scans/orca_security/many_vulns.json new file mode 100644 index 00000000000..85f75c5479e --- /dev/null +++ b/unittests/scans/orca_security/many_vulns.json @@ -0,0 +1,62 @@ +[ + { + "Title": "Low severity test finding", + "Labels": ["CSPM"], + "CreatedAt": "2025-01-01T08:00:00+00:00", + "Status": "open", + "Category": "Best practices", + "OrcaScore": 2.0, + "Source": "ResourceA", + "LastSeen": "2025-02-01T08:00:00+00:00", + "CloudAccount": {"Name": "account-dev"}, + "Inventory": {"Name": "ResourceA"} + }, + { + "Title": "Unused role with policy found", + "Labels": ["CSPM", "source: Orca Scan"], + "CreatedAt": "2025-01-15T10:30:00+00:00", + "Status": "open", + "Category": "IAM misconfigurations", + "OrcaScore": 5.1, + "Source": "TestRole_abc123", + "LastSeen": "2025-02-01T12:00:00+00:00", + "CloudAccount": {"Name": "account-test"}, + "Inventory": {"Name": "TestRole_abc123"} + }, + { + "Title": "Public S3 bucket detected", + "Labels": ["CSPM", "mitre: initial access"], + "CreatedAt": "2025-02-01T14:00:00+00:00", + "Status": "open", + "Category": "Data at risk", + "OrcaScore": 7.5, + "Source": "my-public-bucket", + "LastSeen": "2025-02-03T09:00:00+00:00", + "CloudAccount": {"Name": "account-prod"}, + "Inventory": {"Name": "my-public-bucket"} + }, + { + "Title": "Critical IAM root access key active", + "Labels": ["CSPM", "critical"], + "CreatedAt": "2025-02-02T16:00:00+00:00", + "Status": "open", + "Category": "IAM misconfigurations", + "OrcaScore": 9.5, + "Source": "root", + "LastSeen": "2025-02-03T16:00:00+00:00", + "CloudAccount": {"Name": "account-prod"}, + "Inventory": {"Name": "root"} + }, + { + "Title": "Informational security note", + "Labels": [], + "CreatedAt": "2024-12-01T00:00:00+00:00", + "Status": "closed", + "Category": "Best practices", + "OrcaScore": 0, + "Source": "InfoResource", + "LastSeen": "2025-01-01T00:00:00+00:00", + "CloudAccount": {"Name": "account-dev"}, + "Inventory": {"Name": "InfoResource"} + } +] diff --git a/unittests/scans/orca_security/no_vuln.csv b/unittests/scans/orca_security/no_vuln.csv new file mode 100644 index 00000000000..c00978d384e --- /dev/null +++ b/unittests/scans/orca_security/no_vuln.csv @@ -0,0 +1 @@ +OrcaScore,Title,Category,Inventory,Inventory.Name,CloudAccount,CloudAccount.Name,Source,Status,CreatedAt,LastSeen,Labels \ No newline at end of file diff --git a/unittests/scans/orca_security/no_vuln.json b/unittests/scans/orca_security/no_vuln.json new file mode 100644 index 00000000000..fe51488c706 --- /dev/null +++ b/unittests/scans/orca_security/no_vuln.json @@ -0,0 +1 @@ +[] diff --git a/unittests/scans/orca_security/one_vuln.csv b/unittests/scans/orca_security/one_vuln.csv new file mode 100644 index 00000000000..93e3ebf5705 --- /dev/null +++ b/unittests/scans/orca_security/one_vuln.csv @@ -0,0 +1,2 @@ +OrcaScore,Title,Category,Inventory,Inventory.Name,CloudAccount,CloudAccount.Name,Source,Status,CreatedAt,LastSeen,Labels +5.1,Unused role with policy found,IAM misconfigurations,1,TestRole_abc123,1,test-account,TestRole_abc123,open,2025-01-15T10:30:00+00:00,2025-02-01T12:00:00+00:00,"[""CSPM"",""source: Orca Scan""]" diff --git a/unittests/scans/orca_security/one_vuln.json b/unittests/scans/orca_security/one_vuln.json new file mode 100644 index 00000000000..9e134fe52af --- /dev/null +++ b/unittests/scans/orca_security/one_vuln.json @@ -0,0 +1,14 @@ +[ + { + "Title": "Unused role with policy found", + "Labels": ["CSPM", "source: Orca Scan"], + "CreatedAt": "2025-01-15T10:30:00+00:00", + "Status": "open", + "Category": "IAM misconfigurations", + "OrcaScore": 5.1, + "Source": "TestRole_abc123", + "LastSeen": "2025-02-01T12:00:00+00:00", + "CloudAccount": {"Name": "test-account"}, + "Inventory": {"Name": "TestRole_abc123"} + } +] From 87ad00f412d85b942de6073503b3912b6c36f776 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 10:07:36 -0700 Subject: [PATCH 02/13] test: add Orca Security parser unit tests (failing, TDD) Authored by T. Walker - DefectDojo --- unittests/tools/test_orca_security_parser.py | 113 +++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 unittests/tools/test_orca_security_parser.py diff --git a/unittests/tools/test_orca_security_parser.py b/unittests/tools/test_orca_security_parser.py new file mode 100644 index 00000000000..ca98a0693db --- /dev/null +++ b/unittests/tools/test_orca_security_parser.py @@ -0,0 +1,113 @@ +from dojo.models import Test +from dojo.tools.orca_security.parser import OrcaSecurityParser +from unittests.dojo_test_case import DojoTestCase, get_unit_tests_scans_path + + +class TestOrcaSecurityParser(DojoTestCase): + + # --- CSV Tests --- + + def test_parse_csv_no_findings(self): + with (get_unit_tests_scans_path("orca_security") / "no_vuln.csv").open(encoding="utf-8") as testfile: + parser = OrcaSecurityParser() + findings = parser.get_findings(testfile, Test()) + self.assertEqual(0, len(findings)) + + def test_parse_csv_one_finding(self): + with (get_unit_tests_scans_path("orca_security") / "one_vuln.csv").open(encoding="utf-8") as testfile: + parser = OrcaSecurityParser() + findings = parser.get_findings(testfile, Test()) + self.assertEqual(1, len(findings)) + finding = findings[0] + self.assertEqual("Unused role with policy found", finding.title) + self.assertEqual("Medium", finding.severity) + self.assertTrue(finding.active) + self.assertTrue(finding.static_finding) + self.assertFalse(finding.dynamic_finding) + self.assertEqual("TestRole_abc123", finding.component_name) + self.assertIsNotNone(finding.unique_id_from_tool) + self.assertIn("IAM misconfigurations", finding.description) + + def test_parse_csv_many_findings(self): + with (get_unit_tests_scans_path("orca_security") / "many_vulns.csv").open(encoding="utf-8") as testfile: + parser = OrcaSecurityParser() + findings = parser.get_findings(testfile, Test()) + self.assertEqual(5, len(findings)) + + # Check severity mapping across all levels + severities = [f.severity for f in findings] + self.assertIn("Low", severities) + self.assertIn("Medium", severities) + self.assertIn("High", severities) + self.assertIn("Critical", severities) + self.assertIn("Info", severities) + + # Check inactive finding (last one, status=closed) + closed_finding = findings[4] + self.assertFalse(closed_finding.active) + self.assertEqual("Info", closed_finding.severity) + + # --- JSON Tests --- + + def test_parse_json_no_findings(self): + with (get_unit_tests_scans_path("orca_security") / "no_vuln.json").open(encoding="utf-8") as testfile: + parser = OrcaSecurityParser() + findings = parser.get_findings(testfile, Test()) + self.assertEqual(0, len(findings)) + + def test_parse_json_one_finding(self): + with (get_unit_tests_scans_path("orca_security") / "one_vuln.json").open(encoding="utf-8") as testfile: + parser = OrcaSecurityParser() + findings = parser.get_findings(testfile, Test()) + self.assertEqual(1, len(findings)) + finding = findings[0] + self.assertEqual("Unused role with policy found", finding.title) + self.assertEqual("Medium", finding.severity) + self.assertTrue(finding.active) + self.assertTrue(finding.static_finding) + self.assertFalse(finding.dynamic_finding) + self.assertEqual("TestRole_abc123", finding.component_name) + self.assertIsNotNone(finding.unique_id_from_tool) + self.assertIn("IAM misconfigurations", finding.description) + + def test_parse_json_many_findings(self): + with (get_unit_tests_scans_path("orca_security") / "many_vulns.json").open(encoding="utf-8") as testfile: + parser = OrcaSecurityParser() + findings = parser.get_findings(testfile, Test()) + self.assertEqual(5, len(findings)) + + # Check severity mapping across all levels + severities = [f.severity for f in findings] + self.assertIn("Low", severities) + self.assertIn("Medium", severities) + self.assertIn("High", severities) + self.assertIn("Critical", severities) + self.assertIn("Info", severities) + + # Check inactive finding (last one, status=closed) + closed_finding = findings[4] + self.assertFalse(closed_finding.active) + self.assertEqual("Info", closed_finding.severity) + + # --- Cross-format consistency tests --- + + def test_unique_id_from_tool_is_consistent(self): + """Same alert data in CSV and JSON should produce the same unique_id_from_tool.""" + with (get_unit_tests_scans_path("orca_security") / "one_vuln.csv").open(encoding="utf-8") as csv_file: + csv_findings = OrcaSecurityParser().get_findings(csv_file, Test()) + + with (get_unit_tests_scans_path("orca_security") / "one_vuln.json").open(encoding="utf-8") as json_file: + json_findings = OrcaSecurityParser().get_findings(json_file, Test()) + + self.assertEqual(csv_findings[0].unique_id_from_tool, json_findings[0].unique_id_from_tool) + + def test_date_is_parsed(self): + """CreatedAt should be parsed into a date object.""" + with (get_unit_tests_scans_path("orca_security") / "one_vuln.json").open(encoding="utf-8") as testfile: + parser = OrcaSecurityParser() + findings = parser.get_findings(testfile, Test()) + finding = findings[0] + self.assertIsNotNone(finding.date) + self.assertEqual(2025, finding.date.year) + self.assertEqual(1, finding.date.month) + self.assertEqual(15, finding.date.day) From 526add90531665ac0fc225f5b5814005ed5b6a3d Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 10:08:48 -0700 Subject: [PATCH 03/13] feat: add Orca Security parser stubs and shared utility functions Authored by T. Walker - DefectDojo --- dojo/tools/orca_security/__init__.py | 0 dojo/tools/orca_security/csv_parser.py | 3 + dojo/tools/orca_security/json_parser.py | 3 + dojo/tools/orca_security/parser.py | 104 ++++++++++++++++++++++++ 4 files changed, 110 insertions(+) create mode 100644 dojo/tools/orca_security/__init__.py create mode 100644 dojo/tools/orca_security/csv_parser.py create mode 100644 dojo/tools/orca_security/json_parser.py create mode 100644 dojo/tools/orca_security/parser.py diff --git a/dojo/tools/orca_security/__init__.py b/dojo/tools/orca_security/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dojo/tools/orca_security/csv_parser.py b/dojo/tools/orca_security/csv_parser.py new file mode 100644 index 00000000000..d09fc396172 --- /dev/null +++ b/dojo/tools/orca_security/csv_parser.py @@ -0,0 +1,3 @@ +class OrcaSecurityCSVParser: + def parse(self, content): + raise NotImplementedError diff --git a/dojo/tools/orca_security/json_parser.py b/dojo/tools/orca_security/json_parser.py new file mode 100644 index 00000000000..1ccb03e729e --- /dev/null +++ b/dojo/tools/orca_security/json_parser.py @@ -0,0 +1,3 @@ +class OrcaSecurityJSONParser: + def parse(self, content): + raise NotImplementedError diff --git a/dojo/tools/orca_security/parser.py b/dojo/tools/orca_security/parser.py new file mode 100644 index 00000000000..bf46328deec --- /dev/null +++ b/dojo/tools/orca_security/parser.py @@ -0,0 +1,104 @@ +import hashlib + +from dateutil import parser as dateutil_parser + +from dojo.tools.orca_security.csv_parser import OrcaSecurityCSVParser +from dojo.tools.orca_security.json_parser import OrcaSecurityJSONParser + + +def map_orca_severity(score): + """Map OrcaScore (float 0-10) to DefectDojo severity string.""" + try: + score = float(score) + except (TypeError, ValueError): + return "Info" + if score <= 0: + return "Info" + if score < 4.0: + return "Low" + if score < 7.0: + return "Medium" + if score < 9.0: + return "High" + return "Critical" + + +def build_unique_id(title, source, cloud_account_name): + """SHA-256 hash of title|source|cloud_account_name for deduplication.""" + raw = f"{title}|{source}|{cloud_account_name}" + return hashlib.sha256(raw.encode("utf-8")).hexdigest() + + +def parse_date(date_string): + """Parse ISO 8601 date string, return date object or None.""" + if not date_string: + return None + try: + return dateutil_parser.parse(date_string).date() + except (ValueError, TypeError): + return None + + +def truncate_title(title, max_length=150): + """Truncate title to max_length, appending '...' if truncated.""" + if not title: + return "Orca Security Alert" + if len(title) <= max_length: + return title + return title[: max_length - 3] + "..." + + +def build_description(title, category, source, inventory_name, cloud_account_name, + orca_score, status, created_at, last_seen, labels): + """Build structured markdown description from alert fields.""" + parts = [] + if title: + parts.append(f"**Title:** {title}") + if category: + parts.append(f"**Category:** {category}") + if source: + parts.append(f"**Source:** {source}") + if inventory_name: + parts.append(f"**Inventory:** {inventory_name}") + if cloud_account_name: + parts.append(f"**Cloud Account:** {cloud_account_name}") + if orca_score is not None: + parts.append(f"**Orca Score:** {orca_score}") + if status: + parts.append(f"**Status:** {status}") + if created_at: + parts.append(f"**Created:** {created_at}") + if last_seen: + parts.append(f"**Last Seen:** {last_seen}") + if labels: + if isinstance(labels, list): + labels_str = ", ".join(str(lbl) for lbl in labels) + else: + labels_str = str(labels) + if labels_str: + parts.append(f"**Labels:** {labels_str}") + return "\n\n".join(parts) if parts else "No details available." + + +class OrcaSecurityParser: + """Parser for Orca Security alert exports (CSV and JSON).""" + + ID = "Orca Security Alerts" + + def get_scan_types(self): + return [self.ID] + + def get_label_for_scan_types(self, scan_type): + return scan_type + + def get_description_for_scan_types(self, scan_type): + return "Import Orca Security alerts (CSV or JSON export)." + + def get_findings(self, filename, test): + content = filename.read() + if isinstance(content, bytes): + content = content.decode("utf-8", errors="replace") + content_strip = content.strip() + if content_strip.startswith("["): + return OrcaSecurityJSONParser().parse(content_strip) + return OrcaSecurityCSVParser().parse(content_strip) From 739bf20b43656e58cf1d87718a650b2216de9575 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 10:11:08 -0700 Subject: [PATCH 04/13] feat: implement Orca Security CSV parser Extract shared helper functions into helpers.py to avoid circular imports between parser.py and csv_parser.py. The CSV parser reads Orca Security alert exports and maps fields to DefectDojo findings. Authored by T. Walker - DefectDojo --- dojo/tools/orca_security/csv_parser.py | 61 ++++++++++++++++- dojo/tools/orca_security/helpers.py | 77 +++++++++++++++++++++ dojo/tools/orca_security/parser.py | 93 +++++--------------------- 3 files changed, 153 insertions(+), 78 deletions(-) create mode 100644 dojo/tools/orca_security/helpers.py diff --git a/dojo/tools/orca_security/csv_parser.py b/dojo/tools/orca_security/csv_parser.py index d09fc396172..01e51384a67 100644 --- a/dojo/tools/orca_security/csv_parser.py +++ b/dojo/tools/orca_security/csv_parser.py @@ -1,3 +1,62 @@ +import csv +import io +import json + +from dojo.models import Finding +from dojo.tools.orca_security.helpers import ( + build_description, + build_unique_id, + map_orca_severity, + parse_date, + truncate_title, +) + + class OrcaSecurityCSVParser: + """Parse Orca Security CSV alert exports.""" + def parse(self, content): - raise NotImplementedError + reader = csv.DictReader(io.StringIO(content), delimiter=",", quotechar='"') + findings = [] + for row in reader: + title_raw = (row.get("Title") or "").strip() + category = (row.get("Category") or "").strip() + source = (row.get("Source") or "").strip() + inventory_name = (row.get("Inventory.Name") or "").strip() + cloud_account_name = (row.get("CloudAccount.Name") or "").strip() + orca_score_raw = (row.get("OrcaScore") or "").strip() + status = (row.get("Status") or "").strip() + created_at = (row.get("CreatedAt") or "").strip() + last_seen = (row.get("LastSeen") or "").strip() + labels_raw = (row.get("Labels") or "").strip() + + # Parse labels from JSON string + labels = [] + if labels_raw: + try: + labels = json.loads(labels_raw) + except (json.JSONDecodeError, TypeError): + labels = [labels_raw] + + title = truncate_title(title_raw) + severity = map_orca_severity(orca_score_raw) + + description = build_description( + title_raw, category, source, inventory_name, cloud_account_name, + orca_score_raw, status, created_at, last_seen, labels, + ) + + finding = Finding( + title=title, + severity=severity, + description=description, + static_finding=True, + dynamic_finding=False, + component_name=inventory_name or None, + unique_id_from_tool=build_unique_id(title_raw, source, cloud_account_name), + date=parse_date(created_at), + ) + finding.active = status.lower() == "open" if status else True + + findings.append(finding) + return findings diff --git a/dojo/tools/orca_security/helpers.py b/dojo/tools/orca_security/helpers.py new file mode 100644 index 00000000000..8468fb69e9b --- /dev/null +++ b/dojo/tools/orca_security/helpers.py @@ -0,0 +1,77 @@ +import hashlib + +from dateutil import parser as dateutil_parser + + +def map_orca_severity(score): + """Map OrcaScore (float 0-10) to DefectDojo severity string.""" + try: + score = float(score) + except (TypeError, ValueError): + return "Info" + if score <= 0: + return "Info" + if score < 4.0: + return "Low" + if score < 7.0: + return "Medium" + if score < 9.0: + return "High" + return "Critical" + + +def build_unique_id(title, source, cloud_account_name): + """SHA-256 hash of title|source|cloud_account_name for deduplication.""" + raw = f"{title}|{source}|{cloud_account_name}" + return hashlib.sha256(raw.encode("utf-8")).hexdigest() + + +def parse_date(date_string): + """Parse ISO 8601 date string, return date object or None.""" + if not date_string: + return None + try: + return dateutil_parser.parse(date_string).date() + except (ValueError, TypeError): + return None + + +def truncate_title(title, max_length=150): + """Truncate title to max_length, appending '...' if truncated.""" + if not title: + return "Orca Security Alert" + if len(title) <= max_length: + return title + return title[: max_length - 3] + "..." + + +def build_description(title, category, source, inventory_name, cloud_account_name, + orca_score, status, created_at, last_seen, labels): + """Build structured markdown description from alert fields.""" + parts = [] + if title: + parts.append(f"**Title:** {title}") + if category: + parts.append(f"**Category:** {category}") + if source: + parts.append(f"**Source:** {source}") + if inventory_name: + parts.append(f"**Inventory:** {inventory_name}") + if cloud_account_name: + parts.append(f"**Cloud Account:** {cloud_account_name}") + if orca_score is not None: + parts.append(f"**Orca Score:** {orca_score}") + if status: + parts.append(f"**Status:** {status}") + if created_at: + parts.append(f"**Created:** {created_at}") + if last_seen: + parts.append(f"**Last Seen:** {last_seen}") + if labels: + if isinstance(labels, list): + labels_str = ", ".join(str(lbl) for lbl in labels) + else: + labels_str = str(labels) + if labels_str: + parts.append(f"**Labels:** {labels_str}") + return "\n\n".join(parts) if parts else "No details available." diff --git a/dojo/tools/orca_security/parser.py b/dojo/tools/orca_security/parser.py index bf46328deec..7dffb28173f 100644 --- a/dojo/tools/orca_security/parser.py +++ b/dojo/tools/orca_security/parser.py @@ -1,83 +1,22 @@ -import hashlib - -from dateutil import parser as dateutil_parser - from dojo.tools.orca_security.csv_parser import OrcaSecurityCSVParser +from dojo.tools.orca_security.helpers import ( + build_description, + build_unique_id, + map_orca_severity, + parse_date, + truncate_title, +) from dojo.tools.orca_security.json_parser import OrcaSecurityJSONParser - -def map_orca_severity(score): - """Map OrcaScore (float 0-10) to DefectDojo severity string.""" - try: - score = float(score) - except (TypeError, ValueError): - return "Info" - if score <= 0: - return "Info" - if score < 4.0: - return "Low" - if score < 7.0: - return "Medium" - if score < 9.0: - return "High" - return "Critical" - - -def build_unique_id(title, source, cloud_account_name): - """SHA-256 hash of title|source|cloud_account_name for deduplication.""" - raw = f"{title}|{source}|{cloud_account_name}" - return hashlib.sha256(raw.encode("utf-8")).hexdigest() - - -def parse_date(date_string): - """Parse ISO 8601 date string, return date object or None.""" - if not date_string: - return None - try: - return dateutil_parser.parse(date_string).date() - except (ValueError, TypeError): - return None - - -def truncate_title(title, max_length=150): - """Truncate title to max_length, appending '...' if truncated.""" - if not title: - return "Orca Security Alert" - if len(title) <= max_length: - return title - return title[: max_length - 3] + "..." - - -def build_description(title, category, source, inventory_name, cloud_account_name, - orca_score, status, created_at, last_seen, labels): - """Build structured markdown description from alert fields.""" - parts = [] - if title: - parts.append(f"**Title:** {title}") - if category: - parts.append(f"**Category:** {category}") - if source: - parts.append(f"**Source:** {source}") - if inventory_name: - parts.append(f"**Inventory:** {inventory_name}") - if cloud_account_name: - parts.append(f"**Cloud Account:** {cloud_account_name}") - if orca_score is not None: - parts.append(f"**Orca Score:** {orca_score}") - if status: - parts.append(f"**Status:** {status}") - if created_at: - parts.append(f"**Created:** {created_at}") - if last_seen: - parts.append(f"**Last Seen:** {last_seen}") - if labels: - if isinstance(labels, list): - labels_str = ", ".join(str(lbl) for lbl in labels) - else: - labels_str = str(labels) - if labels_str: - parts.append(f"**Labels:** {labels_str}") - return "\n\n".join(parts) if parts else "No details available." +# Re-export helpers so existing imports from this module still work +__all__ = [ + "build_description", + "build_unique_id", + "map_orca_severity", + "parse_date", + "truncate_title", + "OrcaSecurityParser", +] class OrcaSecurityParser: From b3e1121d07ec81db8d87de503d4145cf018e8d5c Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 10:12:26 -0700 Subject: [PATCH 05/13] feat: implement Orca Security JSON parser Authored by T. Walker - DefectDojo --- dojo/tools/orca_security/json_parser.py | 55 ++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/dojo/tools/orca_security/json_parser.py b/dojo/tools/orca_security/json_parser.py index 1ccb03e729e..8d56f168d54 100644 --- a/dojo/tools/orca_security/json_parser.py +++ b/dojo/tools/orca_security/json_parser.py @@ -1,3 +1,56 @@ +import json + +from dojo.models import Finding +from dojo.tools.orca_security.helpers import ( + build_description, + build_unique_id, + map_orca_severity, + parse_date, + truncate_title, +) + + class OrcaSecurityJSONParser: + """Parse Orca Security JSON alert exports.""" + def parse(self, content): - raise NotImplementedError + data = json.loads(content) + findings = [] + for item in data: + title_raw = (item.get("Title") or "").strip() + category = (item.get("Category") or "").strip() + source = (item.get("Source") or "").strip() + status = (item.get("Status") or "").strip() + created_at = (item.get("CreatedAt") or "").strip() + last_seen = (item.get("LastSeen") or "").strip() + orca_score = item.get("OrcaScore") + labels = item.get("Labels") or [] + + cloud_account = item.get("CloudAccount") or {} + cloud_account_name = (cloud_account.get("Name") or "").strip() + + inventory = item.get("Inventory") or {} + inventory_name = (inventory.get("Name") or "").strip() + + title = truncate_title(title_raw) + severity = map_orca_severity(orca_score) + + description = build_description( + title_raw, category, source, inventory_name, cloud_account_name, + orca_score, status, created_at, last_seen, labels, + ) + + finding = Finding( + title=title, + severity=severity, + description=description, + static_finding=True, + dynamic_finding=False, + component_name=inventory_name or None, + unique_id_from_tool=build_unique_id(title_raw, source, cloud_account_name), + date=parse_date(created_at), + ) + finding.active = status.lower() == "open" if status else True + + findings.append(finding) + return findings From b8129557c94584bfe96663196fc7a1ecb758d826 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 10:13:35 -0700 Subject: [PATCH 06/13] style: fix linting issues in Orca Security parser Authored by T. Walker - DefectDojo --- dojo/tools/orca_security/csv_parser.py | 1 + dojo/tools/orca_security/helpers.py | 5 +---- dojo/tools/orca_security/json_parser.py | 1 + dojo/tools/orca_security/parser.py | 3 ++- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dojo/tools/orca_security/csv_parser.py b/dojo/tools/orca_security/csv_parser.py index 01e51384a67..bca87dd4051 100644 --- a/dojo/tools/orca_security/csv_parser.py +++ b/dojo/tools/orca_security/csv_parser.py @@ -13,6 +13,7 @@ class OrcaSecurityCSVParser: + """Parse Orca Security CSV alert exports.""" def parse(self, content): diff --git a/dojo/tools/orca_security/helpers.py b/dojo/tools/orca_security/helpers.py index 8468fb69e9b..8bf77953258 100644 --- a/dojo/tools/orca_security/helpers.py +++ b/dojo/tools/orca_security/helpers.py @@ -68,10 +68,7 @@ def build_description(title, category, source, inventory_name, cloud_account_nam if last_seen: parts.append(f"**Last Seen:** {last_seen}") if labels: - if isinstance(labels, list): - labels_str = ", ".join(str(lbl) for lbl in labels) - else: - labels_str = str(labels) + labels_str = ", ".join(str(lbl) for lbl in labels) if isinstance(labels, list) else str(labels) if labels_str: parts.append(f"**Labels:** {labels_str}") return "\n\n".join(parts) if parts else "No details available." diff --git a/dojo/tools/orca_security/json_parser.py b/dojo/tools/orca_security/json_parser.py index 8d56f168d54..d797e3b2a23 100644 --- a/dojo/tools/orca_security/json_parser.py +++ b/dojo/tools/orca_security/json_parser.py @@ -11,6 +11,7 @@ class OrcaSecurityJSONParser: + """Parse Orca Security JSON alert exports.""" def parse(self, content): diff --git a/dojo/tools/orca_security/parser.py b/dojo/tools/orca_security/parser.py index 7dffb28173f..6b1c9457c82 100644 --- a/dojo/tools/orca_security/parser.py +++ b/dojo/tools/orca_security/parser.py @@ -10,16 +10,17 @@ # Re-export helpers so existing imports from this module still work __all__ = [ + "OrcaSecurityParser", "build_description", "build_unique_id", "map_orca_severity", "parse_date", "truncate_title", - "OrcaSecurityParser", ] class OrcaSecurityParser: + """Parser for Orca Security alert exports (CSV and JSON).""" ID = "Orca Security Alerts" From 9c7cf0b65ded9d4a07bb1fe65d3b7d657d9f56b4 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 10:15:39 -0700 Subject: [PATCH 07/13] docs: add Orca Security parser documentation Authored by T. Walker - DefectDojo --- .../parsers/file/orca_security.md | 158 ++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 docs/content/supported_tools/parsers/file/orca_security.md diff --git a/docs/content/supported_tools/parsers/file/orca_security.md b/docs/content/supported_tools/parsers/file/orca_security.md new file mode 100644 index 00000000000..e09f4cbcd8a --- /dev/null +++ b/docs/content/supported_tools/parsers/file/orca_security.md @@ -0,0 +1,158 @@ +--- +title: "Orca Security Alerts" +toc_hide: true +--- + +The [Orca Security](https://orca.security/) parser for DefectDojo supports imports from CSV and JSON formats. This document details the parsing of Orca Security alert exports into DefectDojo field mappings, unmapped fields, and location of each field's parsing code for easier troubleshooting and analysis. + +## Supported File Types + +The Orca Security parser accepts CSV and JSON file formats. To generate these files from Orca Security: + +1. Log into the Orca Security console +2. Navigate to the Alerts page +3. Apply desired filters (scope, severity, status) +4. Click "Export" and select either CSV or JSON format +5. Save the exported file +6. Upload to DefectDojo using the "Orca Security Alerts" scan type + +The parser auto-detects the format: files starting with `[` are treated as JSON, otherwise CSV. + +## Default Deduplication Hashcode Fields + +By default, DefectDojo identifies duplicate Findings using the [unique_id_from_tool](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/) field, which is a SHA-256 hash of: + +- Title +- Source +- CloudAccount.Name + +### Sample Scan Data + +Sample Orca Security scans can be found in the [sample scan data folder](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/orca_security). + +## Link To Tool + +- [Orca Security](https://orca.security/) +- [Orca Security Documentation](https://docs.orcasecurity.io/) + +## CSV Format + +### Total Fields in CSV + +- Total data fields: 12 +- Total data fields parsed: 11 +- Total data fields NOT parsed: 1 (Inventory — numeric ID, not used) + +### CSV Format Field Mapping Details + +
+Click to expand Field Mapping Table + +| Source Field | DefectDojo Field | Parser File & Line # | Notes | +| ------------ | ---------------- | -------------------- | ----- | +| Title | title | csv_parser.py:23, helpers.py:39 | Truncated at 150 characters with "..." suffix | +| OrcaScore | severity | csv_parser.py:28, helpers.py:6 | Float mapped to severity string (see Status Conversion) | +| Category | description | csv_parser.py:24, helpers.py:55 | Included in structured markdown description | +| Inventory.Name | component_name | csv_parser.py:26, csv_parser.py:56 | Cloud resource name | +| CloudAccount.Name | description | csv_parser.py:27, helpers.py:61 | Included in description and used for dedup hash | +| Source | description | csv_parser.py:25, helpers.py:57 | Included in description and used for dedup hash | +| Status | active | csv_parser.py:29, csv_parser.py:60 | "open" = active, all else = inactive | +| CreatedAt | date | csv_parser.py:30, helpers.py:29 | ISO 8601 parsed to date object | +| LastSeen | description | csv_parser.py:31, helpers.py:69 | Included in description | +| Labels | description | csv_parser.py:32, csv_parser.py:38 | JSON-encoded array parsed and included as comma-separated list | +| Title+Source+CloudAccount.Name | unique_id_from_tool | csv_parser.py:57, helpers.py:23 | SHA-256 hash for deduplication | + +
+ +### Additional Finding Field Settings (CSV Format) + +
+Click to expand Additional Settings Table + +| Finding Field | Default Value | Parser File & Line # | Notes | +|---------------|---------------|----------------------|-------| +| static_finding | True | csv_parser.py:54 | CSPM scan data is static analysis | +| dynamic_finding | False | csv_parser.py:55 | Not a dynamic/runtime scan | +| active | Varies | csv_parser.py:60 | Based on Status field ("open" = True) | +| mitigation | Not set | — | Orca exports do not include remediation text | + +
+ +## JSON Format + +### Total Fields in JSON + +- Total data fields: 10 +- Total data fields parsed: 10 +- Total data fields NOT parsed: 0 + +### JSON Format Field Mapping Details + +
+Click to expand Field Mapping Table + +| Source Field | DefectDojo Field | Parser File & Line # | Notes | +| ------------ | ---------------- | -------------------- | ----- | +| Title | title | json_parser.py:21, helpers.py:39 | Truncated at 150 characters with "..." suffix | +| OrcaScore | severity | json_parser.py:27, helpers.py:6 | Float mapped to severity string (see Status Conversion) | +| Category | description | json_parser.py:22, helpers.py:55 | Included in structured markdown description | +| Inventory.Name | component_name | json_parser.py:33-34, json_parser.py:50 | Nested object, cloud resource name | +| CloudAccount.Name | description | json_parser.py:30-31, helpers.py:61 | Nested object, included in description and dedup hash | +| Source | description | json_parser.py:23, helpers.py:57 | Included in description and used for dedup hash | +| Status | active | json_parser.py:24, json_parser.py:54 | "open" = active, all else = inactive | +| CreatedAt | date | json_parser.py:25, helpers.py:29 | ISO 8601 parsed to date object | +| LastSeen | description | json_parser.py:26, helpers.py:69 | Included in description | +| Labels | description | json_parser.py:28, helpers.py:70 | Array of strings included as comma-separated list | + +
+ +### Additional Finding Field Settings (JSON Format) + +
+Click to expand Additional Settings Table + +| Finding Field | Default Value | Parser File & Line # | Notes | +|---------------|---------------|----------------------|-------| +| static_finding | True | json_parser.py:48 | CSPM scan data is static analysis | +| dynamic_finding | False | json_parser.py:49 | Not a dynamic/runtime scan | +| active | Varies | json_parser.py:54 | Based on Status field ("open" = True) | +| mitigation | Not set | — | Orca exports do not include remediation text | + +
+ +## Special Processing Notes + +### Date Processing + +The parser uses `dateutil.parser.parse()` to handle ISO 8601 date formats from Orca Security exports (helpers.py:29-36). The datetime is converted to a date object using `.date()`. Invalid or missing date strings return `None`. + +### Status Conversion + +OrcaScore (float 0-10) is converted to DefectDojo severity levels (helpers.py:6-20): +- `0` or missing → Info +- `0.1 - 3.9` → Low +- `4.0 - 6.9` → Medium +- `7.0 - 8.9` → High +- `9.0 - 10.0` → Critical + +The conversion uses `float()` with error handling — non-numeric values default to Info severity. + +### Description Construction + +The parser builds a structured markdown description from all available alert fields (helpers.py:48-74). Each field is formatted as a bold label followed by its value, separated by double newlines. Fields with empty values are omitted. The description includes: Title, Category, Source, Inventory name, Cloud Account name, Orca Score, Status, Created date, Last Seen date, and Labels. + +### Title Format + +Finding titles use the alert's Title field directly (helpers.py:39-45). Titles longer than 150 characters are truncated with a "..." suffix. Alerts with no title receive the default "Orca Security Alert". + +### Mitigation Construction + +Orca Security CSV and JSON exports do not include remediation or mitigation text. The mitigation field is not populated by this parser. + +### Deduplication + +The `unique_id_from_tool` field is populated with a SHA-256 hex digest of the concatenation `Title|Source|CloudAccount.Name` (helpers.py:23-26). This ensures consistent deduplication across both CSV and JSON imports — the same alert produces the same unique ID regardless of import format. Each row/item in the export becomes one Finding with no internal deduplication. + +### Labels Handling + +In CSV format, the Labels column contains a JSON-encoded array of strings (csv_parser.py:34-40). The parser uses `json.loads()` to parse this embedded JSON. If parsing fails, the raw string is used as a single label. In JSON format, Labels is a native array of strings (json_parser.py:28). In both formats, labels are joined with commas and included in the description. From 7226e828cccc39b3b9f1e2858807225d73dd892d Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 20:40:56 -0700 Subject: [PATCH 08/13] feat: improve Orca Security parser field mappings - Source now populates 'service' field - OrcaScore populates severity_justification as 'OrcaScore: X.X' - Labels now populate finding tags (unsaved_tags) - unique_id_from_tool changed to hash(CloudAccount.Name|Inventory.Name|Title) Authored by T. Walker - DefectDojo --- dojo/tools/orca_security/csv_parser.py | 7 ++++++- dojo/tools/orca_security/helpers.py | 18 +++++++++++++++--- dojo/tools/orca_security/json_parser.py | 7 ++++++- unittests/tools/test_orca_security_parser.py | 6 ++++++ 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/dojo/tools/orca_security/csv_parser.py b/dojo/tools/orca_security/csv_parser.py index bca87dd4051..faf971e80b0 100644 --- a/dojo/tools/orca_security/csv_parser.py +++ b/dojo/tools/orca_security/csv_parser.py @@ -5,6 +5,7 @@ from dojo.models import Finding from dojo.tools.orca_security.helpers import ( build_description, + build_severity_justification, build_unique_id, map_orca_severity, parse_date, @@ -51,13 +52,17 @@ def parse(self, content): title=title, severity=severity, description=description, + severity_justification=build_severity_justification(orca_score_raw), static_finding=True, dynamic_finding=False, + service=source or None, component_name=inventory_name or None, - unique_id_from_tool=build_unique_id(title_raw, source, cloud_account_name), + unique_id_from_tool=build_unique_id(cloud_account_name, inventory_name, title_raw), date=parse_date(created_at), ) finding.active = status.lower() == "open" if status else True + if labels: + finding.unsaved_tags = labels findings.append(finding) return findings diff --git a/dojo/tools/orca_security/helpers.py b/dojo/tools/orca_security/helpers.py index 8bf77953258..6ec7d87dc5c 100644 --- a/dojo/tools/orca_security/helpers.py +++ b/dojo/tools/orca_security/helpers.py @@ -20,12 +20,24 @@ def map_orca_severity(score): return "Critical" -def build_unique_id(title, source, cloud_account_name): - """SHA-256 hash of title|source|cloud_account_name for deduplication.""" - raw = f"{title}|{source}|{cloud_account_name}" +def build_unique_id(cloud_account_name, inventory_name, title): + """SHA-256 hash of cloud_account_name|inventory_name|title for deduplication.""" + raw = f"{cloud_account_name}|{inventory_name}|{title}" return hashlib.sha256(raw.encode("utf-8")).hexdigest() +def build_severity_justification(orca_score): + """Build severity justification string from OrcaScore.""" + if orca_score is None: + return None + try: + score = float(orca_score) + except (TypeError, ValueError): + return None + else: + return f"OrcaScore: {score}" + + def parse_date(date_string): """Parse ISO 8601 date string, return date object or None.""" if not date_string: diff --git a/dojo/tools/orca_security/json_parser.py b/dojo/tools/orca_security/json_parser.py index d797e3b2a23..fca9a56b19b 100644 --- a/dojo/tools/orca_security/json_parser.py +++ b/dojo/tools/orca_security/json_parser.py @@ -3,6 +3,7 @@ from dojo.models import Finding from dojo.tools.orca_security.helpers import ( build_description, + build_severity_justification, build_unique_id, map_orca_severity, parse_date, @@ -45,13 +46,17 @@ def parse(self, content): title=title, severity=severity, description=description, + severity_justification=build_severity_justification(orca_score), static_finding=True, dynamic_finding=False, + service=source or None, component_name=inventory_name or None, - unique_id_from_tool=build_unique_id(title_raw, source, cloud_account_name), + unique_id_from_tool=build_unique_id(cloud_account_name, inventory_name, title_raw), date=parse_date(created_at), ) finding.active = status.lower() == "open" if status else True + if labels: + finding.unsaved_tags = labels findings.append(finding) return findings diff --git a/unittests/tools/test_orca_security_parser.py b/unittests/tools/test_orca_security_parser.py index ca98a0693db..f1e33f24f56 100644 --- a/unittests/tools/test_orca_security_parser.py +++ b/unittests/tools/test_orca_security_parser.py @@ -25,8 +25,11 @@ def test_parse_csv_one_finding(self): self.assertTrue(finding.static_finding) self.assertFalse(finding.dynamic_finding) self.assertEqual("TestRole_abc123", finding.component_name) + self.assertEqual("TestRole_abc123", finding.service) + self.assertEqual("OrcaScore: 5.1", finding.severity_justification) self.assertIsNotNone(finding.unique_id_from_tool) self.assertIn("IAM misconfigurations", finding.description) + self.assertEqual(["CSPM", "source: Orca Scan"], finding.unsaved_tags) def test_parse_csv_many_findings(self): with (get_unit_tests_scans_path("orca_security") / "many_vulns.csv").open(encoding="utf-8") as testfile: @@ -67,8 +70,11 @@ def test_parse_json_one_finding(self): self.assertTrue(finding.static_finding) self.assertFalse(finding.dynamic_finding) self.assertEqual("TestRole_abc123", finding.component_name) + self.assertEqual("TestRole_abc123", finding.service) + self.assertEqual("OrcaScore: 5.1", finding.severity_justification) self.assertIsNotNone(finding.unique_id_from_tool) self.assertIn("IAM misconfigurations", finding.description) + self.assertEqual(["CSPM", "source: Orca Scan"], finding.unsaved_tags) def test_parse_json_many_findings(self): with (get_unit_tests_scans_path("orca_security") / "many_vulns.json").open(encoding="utf-8") as testfile: From e23c4968ea10e3b947c9b047b8da7b9fb13c44f0 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 20:44:00 -0700 Subject: [PATCH 09/13] docs: update Orca Security parser documentation for improved mappings - Updated deduplication hash fields (CloudAccount.Name|Inventory.Name|Title) - Added service field mapping from Source - Added severity_justification field mapping from OrcaScore - Added tags field mapping from Labels - Updated line number references throughout - Added new Special Processing Notes sections Authored by T. Walker - DefectDojo --- .../parsers/file/orca_security.md | 91 +++++++++++-------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/docs/content/supported_tools/parsers/file/orca_security.md b/docs/content/supported_tools/parsers/file/orca_security.md index e09f4cbcd8a..9990f015973 100644 --- a/docs/content/supported_tools/parsers/file/orca_security.md +++ b/docs/content/supported_tools/parsers/file/orca_security.md @@ -22,9 +22,9 @@ The parser auto-detects the format: files starting with `[` are treated as JSON, By default, DefectDojo identifies duplicate Findings using the [unique_id_from_tool](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/) field, which is a SHA-256 hash of: -- Title -- Source - CloudAccount.Name +- Inventory.Name +- Title ### Sample Scan Data @@ -40,8 +40,8 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ ### Total Fields in CSV - Total data fields: 12 -- Total data fields parsed: 11 -- Total data fields NOT parsed: 1 (Inventory — numeric ID, not used) +- Total data fields parsed: 12 +- Total data fields NOT parsed: 0 ### CSV Format Field Mapping Details @@ -50,17 +50,19 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ | Source Field | DefectDojo Field | Parser File & Line # | Notes | | ------------ | ---------------- | -------------------- | ----- | -| Title | title | csv_parser.py:23, helpers.py:39 | Truncated at 150 characters with "..." suffix | -| OrcaScore | severity | csv_parser.py:28, helpers.py:6 | Float mapped to severity string (see Status Conversion) | -| Category | description | csv_parser.py:24, helpers.py:55 | Included in structured markdown description | -| Inventory.Name | component_name | csv_parser.py:26, csv_parser.py:56 | Cloud resource name | -| CloudAccount.Name | description | csv_parser.py:27, helpers.py:61 | Included in description and used for dedup hash | -| Source | description | csv_parser.py:25, helpers.py:57 | Included in description and used for dedup hash | -| Status | active | csv_parser.py:29, csv_parser.py:60 | "open" = active, all else = inactive | -| CreatedAt | date | csv_parser.py:30, helpers.py:29 | ISO 8601 parsed to date object | -| LastSeen | description | csv_parser.py:31, helpers.py:69 | Included in description | -| Labels | description | csv_parser.py:32, csv_parser.py:38 | JSON-encoded array parsed and included as comma-separated list | -| Title+Source+CloudAccount.Name | unique_id_from_tool | csv_parser.py:57, helpers.py:23 | SHA-256 hash for deduplication | +| Title | title | csv_parser.py:24, helpers.py:51 | Truncated at 150 characters with "..." suffix | +| OrcaScore | severity | csv_parser.py:29, helpers.py:6 | Float mapped to severity string (see Severity Conversion) | +| OrcaScore | severity_justification | csv_parser.py:55, helpers.py:29 | Stored as "OrcaScore: X.X" | +| Category | description | csv_parser.py:25, helpers.py:67 | Included in structured markdown description | +| Inventory.Name | component_name | csv_parser.py:27, csv_parser.py:59 | Cloud resource name | +| CloudAccount.Name | description | csv_parser.py:28, helpers.py:73 | Included in description and used for dedup hash | +| Source | service | csv_parser.py:26, csv_parser.py:58 | Orca resource identifier populates service field | +| Source | description | csv_parser.py:26, helpers.py:69 | Also included in description | +| Status | active | csv_parser.py:30, csv_parser.py:63 | "open" = active, all else = inactive | +| CreatedAt | date | csv_parser.py:31, helpers.py:41 | ISO 8601 parsed to date object | +| LastSeen | description | csv_parser.py:32, helpers.py:81 | Included in description | +| Labels | tags | csv_parser.py:33, csv_parser.py:64-65 | JSON-encoded array parsed and stored as finding tags | +| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | csv_parser.py:60, helpers.py:23 | SHA-256 hash for deduplication | @@ -71,9 +73,9 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ | Finding Field | Default Value | Parser File & Line # | Notes | |---------------|---------------|----------------------|-------| -| static_finding | True | csv_parser.py:54 | CSPM scan data is static analysis | -| dynamic_finding | False | csv_parser.py:55 | Not a dynamic/runtime scan | -| active | Varies | csv_parser.py:60 | Based on Status field ("open" = True) | +| static_finding | True | csv_parser.py:56 | CSPM scan data is static analysis | +| dynamic_finding | False | csv_parser.py:57 | Not a dynamic/runtime scan | +| active | Varies | csv_parser.py:63 | Based on Status field ("open" = True) | | mitigation | Not set | — | Orca exports do not include remediation text | @@ -93,16 +95,19 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ | Source Field | DefectDojo Field | Parser File & Line # | Notes | | ------------ | ---------------- | -------------------- | ----- | -| Title | title | json_parser.py:21, helpers.py:39 | Truncated at 150 characters with "..." suffix | -| OrcaScore | severity | json_parser.py:27, helpers.py:6 | Float mapped to severity string (see Status Conversion) | -| Category | description | json_parser.py:22, helpers.py:55 | Included in structured markdown description | -| Inventory.Name | component_name | json_parser.py:33-34, json_parser.py:50 | Nested object, cloud resource name | -| CloudAccount.Name | description | json_parser.py:30-31, helpers.py:61 | Nested object, included in description and dedup hash | -| Source | description | json_parser.py:23, helpers.py:57 | Included in description and used for dedup hash | -| Status | active | json_parser.py:24, json_parser.py:54 | "open" = active, all else = inactive | -| CreatedAt | date | json_parser.py:25, helpers.py:29 | ISO 8601 parsed to date object | -| LastSeen | description | json_parser.py:26, helpers.py:69 | Included in description | -| Labels | description | json_parser.py:28, helpers.py:70 | Array of strings included as comma-separated list | +| Title | title | json_parser.py:22, helpers.py:51 | Truncated at 150 characters with "..." suffix | +| OrcaScore | severity | json_parser.py:28, helpers.py:6 | Float mapped to severity string (see Severity Conversion) | +| OrcaScore | severity_justification | json_parser.py:49, helpers.py:29 | Stored as "OrcaScore: X.X" | +| Category | description | json_parser.py:23, helpers.py:67 | Included in structured markdown description | +| Inventory.Name | component_name | json_parser.py:34-35, json_parser.py:53 | Nested object, cloud resource name | +| CloudAccount.Name | description | json_parser.py:31-32, helpers.py:73 | Nested object, included in description and dedup hash | +| Source | service | json_parser.py:24, json_parser.py:52 | Orca resource identifier populates service field | +| Source | description | json_parser.py:24, helpers.py:69 | Also included in description | +| Status | active | json_parser.py:25, json_parser.py:57 | "open" = active, all else = inactive | +| CreatedAt | date | json_parser.py:26, helpers.py:41 | ISO 8601 parsed to date object | +| LastSeen | description | json_parser.py:27, helpers.py:81 | Included in description | +| Labels | tags | json_parser.py:29, json_parser.py:58-59 | Array of strings stored as finding tags | +| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | json_parser.py:54, helpers.py:23 | SHA-256 hash for deduplication | @@ -113,9 +118,9 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ | Finding Field | Default Value | Parser File & Line # | Notes | |---------------|---------------|----------------------|-------| -| static_finding | True | json_parser.py:48 | CSPM scan data is static analysis | -| dynamic_finding | False | json_parser.py:49 | Not a dynamic/runtime scan | -| active | Varies | json_parser.py:54 | Based on Status field ("open" = True) | +| static_finding | True | json_parser.py:50 | CSPM scan data is static analysis | +| dynamic_finding | False | json_parser.py:51 | Not a dynamic/runtime scan | +| active | Varies | json_parser.py:57 | Based on Status field ("open" = True) | | mitigation | Not set | — | Orca exports do not include remediation text | @@ -124,9 +129,9 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ ### Date Processing -The parser uses `dateutil.parser.parse()` to handle ISO 8601 date formats from Orca Security exports (helpers.py:29-36). The datetime is converted to a date object using `.date()`. Invalid or missing date strings return `None`. +The parser uses `dateutil.parser.parse()` to handle ISO 8601 date formats from Orca Security exports (helpers.py:41-48). The datetime is converted to a date object using `.date()`. Invalid or missing date strings return `None`. -### Status Conversion +### Severity Conversion OrcaScore (float 0-10) is converted to DefectDojo severity levels (helpers.py:6-20): - `0` or missing → Info @@ -137,13 +142,21 @@ OrcaScore (float 0-10) is converted to DefectDojo severity levels (helpers.py:6- The conversion uses `float()` with error handling — non-numeric values default to Info severity. +### Severity Justification + +The OrcaScore is also stored in the `severity_justification` field as "OrcaScore: X.X" (helpers.py:29-38). This preserves the original numeric score for reference while the severity field contains the mapped categorical value. + ### Description Construction -The parser builds a structured markdown description from all available alert fields (helpers.py:48-74). Each field is formatted as a bold label followed by its value, separated by double newlines. Fields with empty values are omitted. The description includes: Title, Category, Source, Inventory name, Cloud Account name, Orca Score, Status, Created date, Last Seen date, and Labels. +The parser builds a structured markdown description from all available alert fields (helpers.py:60-86). Each field is formatted as a bold label followed by its value, separated by double newlines. Fields with empty values are omitted. The description includes: Title, Category, Source, Inventory name, Cloud Account name, Orca Score, Status, Created date, Last Seen date, and Labels. ### Title Format -Finding titles use the alert's Title field directly (helpers.py:39-45). Titles longer than 150 characters are truncated with a "..." suffix. Alerts with no title receive the default "Orca Security Alert". +Finding titles use the alert's Title field directly (helpers.py:51-57). Titles longer than 150 characters are truncated with a "..." suffix. Alerts with no title receive the default "Orca Security Alert". + +### Service Field + +The Source field from Orca Security populates the DefectDojo `service` field (csv_parser.py:58, json_parser.py:52). This represents the cloud resource or service that generated the alert. ### Mitigation Construction @@ -151,8 +164,10 @@ Orca Security CSV and JSON exports do not include remediation or mitigation text ### Deduplication -The `unique_id_from_tool` field is populated with a SHA-256 hex digest of the concatenation `Title|Source|CloudAccount.Name` (helpers.py:23-26). This ensures consistent deduplication across both CSV and JSON imports — the same alert produces the same unique ID regardless of import format. Each row/item in the export becomes one Finding with no internal deduplication. +The `unique_id_from_tool` field is populated with a SHA-256 hex digest of the concatenation `CloudAccount.Name|Inventory.Name|Title` (helpers.py:23-26). This ensures consistent deduplication across both CSV and JSON imports — the same alert produces the same unique ID regardless of import format. Each row/item in the export becomes one Finding with no internal deduplication. + +### Tags Handling -### Labels Handling +Labels from Orca Security are stored as finding tags using the `unsaved_tags` field (csv_parser.py:64-65, json_parser.py:58-59). This makes labels searchable and filterable in DefectDojo. -In CSV format, the Labels column contains a JSON-encoded array of strings (csv_parser.py:34-40). The parser uses `json.loads()` to parse this embedded JSON. If parsing fails, the raw string is used as a single label. In JSON format, Labels is a native array of strings (json_parser.py:28). In both formats, labels are joined with commas and included in the description. +In CSV format, the Labels column contains a JSON-encoded array of strings (csv_parser.py:35-41). The parser uses `json.loads()` to parse this embedded JSON. If parsing fails, the raw string is used as a single tag. In JSON format, Labels is a native array of strings (json_parser.py:29). From 5745799a14978c8d39af686ff215075157f09fa4 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Wed, 4 Feb 2026 21:02:50 -0700 Subject: [PATCH 10/13] docs: add docstrings and comments to Orca Security parser Add comprehensive docstrings to helpers.py with function descriptions, parameter documentation, and return value documentation. Simplify class docstrings in parser files to match DefectDojo conventions. Authored by T. Walker - DefectDojo --- dojo/tools/orca_security/csv_parser.py | 45 +++++++++- dojo/tools/orca_security/helpers.py | 113 ++++++++++++++++++++++-- dojo/tools/orca_security/json_parser.py | 58 ++++++++++-- dojo/tools/orca_security/parser.py | 29 ++++++ 4 files changed, 230 insertions(+), 15 deletions(-) diff --git a/dojo/tools/orca_security/csv_parser.py b/dojo/tools/orca_security/csv_parser.py index faf971e80b0..77fb82d4c21 100644 --- a/dojo/tools/orca_security/csv_parser.py +++ b/dojo/tools/orca_security/csv_parser.py @@ -1,3 +1,15 @@ +""" +CSV parser for Orca Security alert exports. + +This module handles parsing of Orca Security alerts exported in CSV format. +The CSV export contains one row per alert with columns for all alert metadata. + +Expected CSV columns: + OrcaScore, Title, Category, Inventory, Inventory.Name, CloudAccount, + CloudAccount.Name, Source, Status, CreatedAt, LastSeen, Labels + +Note: The Labels column contains a JSON-encoded array of strings within the CSV. +""" import csv import io import json @@ -18,9 +30,21 @@ class OrcaSecurityCSVParser: """Parse Orca Security CSV alert exports.""" def parse(self, content): + """ + Parse CSV content and return a list of Finding objects. + + Args: + content: String containing the CSV file content + + Returns: + list[Finding]: List of DefectDojo Finding objects + + """ reader = csv.DictReader(io.StringIO(content), delimiter=",", quotechar='"') findings = [] + for row in reader: + # Extract all fields from the CSV row title_raw = (row.get("Title") or "").strip() category = (row.get("Category") or "").strip() source = (row.get("Source") or "").strip() @@ -32,37 +56,50 @@ def parse(self, content): last_seen = (row.get("LastSeen") or "").strip() labels_raw = (row.get("Labels") or "").strip() - # Parse labels from JSON string + # Parse labels from JSON string embedded in CSV + # Orca exports labels as a JSON array within the CSV cell labels = [] if labels_raw: try: labels = json.loads(labels_raw) except (json.JSONDecodeError, TypeError): + # If JSON parsing fails, treat the raw string as a single label labels = [labels_raw] + # Transform fields for DefectDojo title = truncate_title(title_raw) severity = map_orca_severity(orca_score_raw) + # Build structured description with all alert metadata description = build_description( title_raw, category, source, inventory_name, cloud_account_name, orca_score_raw, status, created_at, last_seen, labels, ) + # Create the Finding object with all mapped fields finding = Finding( title=title, severity=severity, description=description, + # Preserve original OrcaScore in severity_justification severity_justification=build_severity_justification(orca_score_raw), - static_finding=True, + static_finding=True, # CSPM scan data is static analysis dynamic_finding=False, - service=source or None, - component_name=inventory_name or None, + service=source or None, # Source identifies the cloud resource/service + component_name=inventory_name or None, # Inventory is the specific resource + # Dedup hash uses cloud account + inventory + title for uniqueness unique_id_from_tool=build_unique_id(cloud_account_name, inventory_name, title_raw), date=parse_date(created_at), ) + + # Set active status based on Orca's status field + # "open" alerts are active, all other statuses (closed, resolved, etc.) are inactive finding.active = status.lower() == "open" if status else True + + # Store labels as tags for searchability in DefectDojo if labels: finding.unsaved_tags = labels findings.append(finding) + return findings diff --git a/dojo/tools/orca_security/helpers.py b/dojo/tools/orca_security/helpers.py index 6ec7d87dc5c..a870aaacb30 100644 --- a/dojo/tools/orca_security/helpers.py +++ b/dojo/tools/orca_security/helpers.py @@ -1,10 +1,35 @@ +""" +Shared helper functions for the Orca Security parser. + +This module contains utility functions used by both the CSV and JSON parsers +to ensure consistent behavior across input formats. +""" import hashlib from dateutil import parser as dateutil_parser def map_orca_severity(score): - """Map OrcaScore (float 0-10) to DefectDojo severity string.""" + """ + Map OrcaScore (float 0-10) to DefectDojo severity string. + + Orca Security uses a numeric score from 0-10 to indicate severity. + This function converts that to DefectDojo's categorical severity levels. + + Mapping thresholds: + - 0 or invalid -> Info + - 0.1 - 3.9 -> Low + - 4.0 - 6.9 -> Medium + - 7.0 - 8.9 -> High + - 9.0 - 10.0 -> Critical + + Args: + score: The OrcaScore value (can be float, int, string, or None) + + Returns: + str: DefectDojo severity level ("Info", "Low", "Medium", "High", "Critical") + + """ try: score = float(score) except (TypeError, ValueError): @@ -21,13 +46,40 @@ def map_orca_severity(score): def build_unique_id(cloud_account_name, inventory_name, title): - """SHA-256 hash of cloud_account_name|inventory_name|title for deduplication.""" + """ + Generate a unique identifier for deduplication. + + Creates a SHA-256 hash from the combination of cloud account, inventory, + and title fields. This ensures the same alert produces the same ID + regardless of whether it's imported from CSV or JSON format. + + Args: + cloud_account_name: The name of the cloud account (e.g., "prod-aws-account") + inventory_name: The name of the inventory/resource (e.g., "my-s3-bucket") + title: The alert title (e.g., "Public S3 bucket detected") + + Returns: + str: 64-character hexadecimal SHA-256 hash + + """ raw = f"{cloud_account_name}|{inventory_name}|{title}" return hashlib.sha256(raw.encode("utf-8")).hexdigest() def build_severity_justification(orca_score): - """Build severity justification string from OrcaScore.""" + """ + Build severity justification string from OrcaScore. + + Preserves the original numeric score in the severity_justification field + so users can see the exact Orca score that determined the severity level. + + Args: + orca_score: The OrcaScore value (can be float, int, string, or None) + + Returns: + str or None: "OrcaScore: X.X" if valid score, None otherwise + + """ if orca_score is None: return None try: @@ -39,7 +91,19 @@ def build_severity_justification(orca_score): def parse_date(date_string): - """Parse ISO 8601 date string, return date object or None.""" + """ + Parse ISO 8601 date string into a Python date object. + + Orca Security exports dates in ISO 8601 format (e.g., "2025-01-15T10:30:00+00:00"). + This function extracts just the date portion for the finding's date field. + + Args: + date_string: ISO 8601 formatted date string, or None/empty string + + Returns: + date or None: Python date object if parsing succeeds, None otherwise + + """ if not date_string: return None try: @@ -49,7 +113,21 @@ def parse_date(date_string): def truncate_title(title, max_length=150): - """Truncate title to max_length, appending '...' if truncated.""" + """ + Truncate title to maximum length with ellipsis suffix. + + DefectDojo has a limit on title length. This function ensures titles + fit within that limit while indicating truncation occurred. + + Args: + title: The original title string, or None/empty string + max_length: Maximum allowed length (default 150 characters) + + Returns: + str: Original title if within limit, truncated with "..." if over, + or "Orca Security Alert" if title is empty/None + + """ if not title: return "Orca Security Alert" if len(title) <= max_length: @@ -59,7 +137,29 @@ def truncate_title(title, max_length=150): def build_description(title, category, source, inventory_name, cloud_account_name, orca_score, status, created_at, last_seen, labels): - """Build structured markdown description from alert fields.""" + """ + Build a structured markdown description from alert fields. + + Creates a formatted description containing all relevant alert metadata. + Each field is displayed as a bold label followed by its value. + Empty/None fields are omitted from the output. + + Args: + title: Alert title + category: Alert category (e.g., "IAM misconfigurations") + source: Source resource identifier + inventory_name: Name of the affected inventory/resource + cloud_account_name: Name of the cloud account + orca_score: Numeric OrcaScore (0-10) + status: Alert status (e.g., "open", "closed") + created_at: ISO 8601 creation timestamp + last_seen: ISO 8601 last seen timestamp + labels: List of label strings or single label string + + Returns: + str: Markdown-formatted description with all non-empty fields + + """ parts = [] if title: parts.append(f"**Title:** {title}") @@ -80,6 +180,7 @@ def build_description(title, category, source, inventory_name, cloud_account_nam if last_seen: parts.append(f"**Last Seen:** {last_seen}") if labels: + # Convert list to comma-separated string labels_str = ", ".join(str(lbl) for lbl in labels) if isinstance(labels, list) else str(labels) if labels_str: parts.append(f"**Labels:** {labels_str}") diff --git a/dojo/tools/orca_security/json_parser.py b/dojo/tools/orca_security/json_parser.py index fca9a56b19b..3c40b330b6e 100644 --- a/dojo/tools/orca_security/json_parser.py +++ b/dojo/tools/orca_security/json_parser.py @@ -1,3 +1,27 @@ +""" +JSON parser for Orca Security alert exports. + +This module handles parsing of Orca Security alerts exported in JSON format. +The JSON export is an array of alert objects with nested structures for +CloudAccount and Inventory fields. + +Expected JSON structure: + [ + { + "Title": "...", + "OrcaScore": 5.1, + "Category": "...", + "Source": "...", + "Status": "open", + "CreatedAt": "2025-01-15T10:30:00+00:00", + "LastSeen": "2025-02-01T12:00:00+00:00", + "Labels": ["label1", "label2"], + "CloudAccount": {"Name": "..."}, + "Inventory": {"Name": "..."} + }, + ... + ] +""" import json from dojo.models import Finding @@ -16,47 +40,71 @@ class OrcaSecurityJSONParser: """Parse Orca Security JSON alert exports.""" def parse(self, content): + """ + Parse JSON content and return a list of Finding objects. + + Args: + content: String containing the JSON file content (array of alerts) + + Returns: + list[Finding]: List of DefectDojo Finding objects + + """ data = json.loads(content) findings = [] + for item in data: + # Extract top-level fields title_raw = (item.get("Title") or "").strip() category = (item.get("Category") or "").strip() source = (item.get("Source") or "").strip() status = (item.get("Status") or "").strip() created_at = (item.get("CreatedAt") or "").strip() last_seen = (item.get("LastSeen") or "").strip() - orca_score = item.get("OrcaScore") - labels = item.get("Labels") or [] + orca_score = item.get("OrcaScore") # Keep as numeric, not string + labels = item.get("Labels") or [] # Already a list in JSON + # Extract nested fields from CloudAccount and Inventory objects cloud_account = item.get("CloudAccount") or {} cloud_account_name = (cloud_account.get("Name") or "").strip() inventory = item.get("Inventory") or {} inventory_name = (inventory.get("Name") or "").strip() + # Transform fields for DefectDojo title = truncate_title(title_raw) severity = map_orca_severity(orca_score) + # Build structured description with all alert metadata description = build_description( title_raw, category, source, inventory_name, cloud_account_name, orca_score, status, created_at, last_seen, labels, ) + # Create the Finding object with all mapped fields finding = Finding( title=title, severity=severity, description=description, + # Preserve original OrcaScore in severity_justification severity_justification=build_severity_justification(orca_score), - static_finding=True, + static_finding=True, # CSPM scan data is static analysis dynamic_finding=False, - service=source or None, - component_name=inventory_name or None, + service=source or None, # Source identifies the cloud resource/service + component_name=inventory_name or None, # Inventory is the specific resource + # Dedup hash uses cloud account + inventory + title for uniqueness unique_id_from_tool=build_unique_id(cloud_account_name, inventory_name, title_raw), date=parse_date(created_at), ) + + # Set active status based on Orca's status field + # "open" alerts are active, all other statuses (closed, resolved, etc.) are inactive finding.active = status.lower() == "open" if status else True + + # Store labels as tags for searchability in DefectDojo if labels: finding.unsaved_tags = labels findings.append(finding) + return findings diff --git a/dojo/tools/orca_security/parser.py b/dojo/tools/orca_security/parser.py index 6b1c9457c82..d18d57a4616 100644 --- a/dojo/tools/orca_security/parser.py +++ b/dojo/tools/orca_security/parser.py @@ -1,3 +1,12 @@ +""" +Orca Security parser for DefectDojo. + +Orca Security is a cloud security platform that provides agentless security +and compliance for AWS, Azure, GCP, and Kubernetes environments. This parser +imports Orca Security alert exports in CSV or JSON format. + +For more information about Orca Security, see: https://orca.security/ +""" from dojo.tools.orca_security.csv_parser import OrcaSecurityCSVParser from dojo.tools.orca_security.helpers import ( build_description, @@ -26,19 +35,39 @@ class OrcaSecurityParser: ID = "Orca Security Alerts" def get_scan_types(self): + """Return the scan type identifier for this parser.""" return [self.ID] def get_label_for_scan_types(self, scan_type): + """Return the human-readable label for this scan type.""" return scan_type def get_description_for_scan_types(self, scan_type): + """Return the description shown in the DefectDojo UI.""" return "Import Orca Security alerts (CSV or JSON export)." def get_findings(self, filename, test): + """ + Parse an Orca Security export file and return findings. + + This method auto-detects the file format (CSV vs JSON) by examining + the file content. JSON files start with '[' (array), while CSV files + start with the header row. + + Args: + filename: File-like object containing the Orca Security export + test: DefectDojo Test object to associate findings with + + Returns: + list[Finding]: List of DefectDojo Finding objects + + """ content = filename.read() if isinstance(content, bytes): content = content.decode("utf-8", errors="replace") content_strip = content.strip() + + # Auto-detect format: JSON arrays start with '[', CSV starts with headers if content_strip.startswith("["): return OrcaSecurityJSONParser().parse(content_strip) return OrcaSecurityCSVParser().parse(content_strip) From 4b904effb91a514645aebf02c5ae09119fec9cc4 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Thu, 5 Mar 2026 14:48:17 -0700 Subject: [PATCH 11/13] fix: add dedup config, increase title limit, remove doc line numbers - Add DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL entry for Orca Security Alerts in settings.dist.py - Increase title truncation from 150 to 500 characters - Remove line number references from documentation tables and prose - Clean up parser.py imports (remove unused helper re-exports) Authored by T. Walker - DefectDojo --- .../parsers/file/orca_security.md | 104 +++++++++--------- dojo/settings/settings.dist.py | 1 + dojo/tools/orca_security/helpers.py | 4 +- dojo/tools/orca_security/parser.py | 26 ----- 4 files changed, 55 insertions(+), 80 deletions(-) diff --git a/docs/content/supported_tools/parsers/file/orca_security.md b/docs/content/supported_tools/parsers/file/orca_security.md index 9990f015973..1cf49c284bd 100644 --- a/docs/content/supported_tools/parsers/file/orca_security.md +++ b/docs/content/supported_tools/parsers/file/orca_security.md @@ -3,7 +3,7 @@ title: "Orca Security Alerts" toc_hide: true --- -The [Orca Security](https://orca.security/) parser for DefectDojo supports imports from CSV and JSON formats. This document details the parsing of Orca Security alert exports into DefectDojo field mappings, unmapped fields, and location of each field's parsing code for easier troubleshooting and analysis. +The [Orca Security](https://orca.security/) parser for DefectDojo supports imports from CSV and JSON formats. This document details the parsing of Orca Security alert exports into DefectDojo field mappings and unmapped fields. ## Supported File Types @@ -48,21 +48,21 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/
Click to expand Field Mapping Table -| Source Field | DefectDojo Field | Parser File & Line # | Notes | -| ------------ | ---------------- | -------------------- | ----- | -| Title | title | csv_parser.py:24, helpers.py:51 | Truncated at 150 characters with "..." suffix | -| OrcaScore | severity | csv_parser.py:29, helpers.py:6 | Float mapped to severity string (see Severity Conversion) | -| OrcaScore | severity_justification | csv_parser.py:55, helpers.py:29 | Stored as "OrcaScore: X.X" | -| Category | description | csv_parser.py:25, helpers.py:67 | Included in structured markdown description | -| Inventory.Name | component_name | csv_parser.py:27, csv_parser.py:59 | Cloud resource name | -| CloudAccount.Name | description | csv_parser.py:28, helpers.py:73 | Included in description and used for dedup hash | -| Source | service | csv_parser.py:26, csv_parser.py:58 | Orca resource identifier populates service field | -| Source | description | csv_parser.py:26, helpers.py:69 | Also included in description | -| Status | active | csv_parser.py:30, csv_parser.py:63 | "open" = active, all else = inactive | -| CreatedAt | date | csv_parser.py:31, helpers.py:41 | ISO 8601 parsed to date object | -| LastSeen | description | csv_parser.py:32, helpers.py:81 | Included in description | -| Labels | tags | csv_parser.py:33, csv_parser.py:64-65 | JSON-encoded array parsed and stored as finding tags | -| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | csv_parser.py:60, helpers.py:23 | SHA-256 hash for deduplication | +| Source Field | DefectDojo Field | Notes | +| ------------ | ---------------- | ----- | +| Title | title | Truncated at 500 characters with "..." suffix | +| OrcaScore | severity | Float mapped to severity string (see Severity Conversion) | +| OrcaScore | severity_justification | Stored as "OrcaScore: X.X" | +| Category | description | Included in structured markdown description | +| Inventory.Name | component_name | Cloud resource name | +| CloudAccount.Name | description | Included in description and used for dedup hash | +| Source | service | Orca resource identifier populates service field | +| Source | description | Also included in description | +| Status | active | "open" = active, all else = inactive | +| CreatedAt | date | ISO 8601 parsed to date object | +| LastSeen | description | Included in description | +| Labels | tags | JSON-encoded array parsed and stored as finding tags | +| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | SHA-256 hash for deduplication |
@@ -71,12 +71,12 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/
Click to expand Additional Settings Table -| Finding Field | Default Value | Parser File & Line # | Notes | -|---------------|---------------|----------------------|-------| -| static_finding | True | csv_parser.py:56 | CSPM scan data is static analysis | -| dynamic_finding | False | csv_parser.py:57 | Not a dynamic/runtime scan | -| active | Varies | csv_parser.py:63 | Based on Status field ("open" = True) | -| mitigation | Not set | — | Orca exports do not include remediation text | +| Finding Field | Default Value | Notes | +|---------------|---------------|-------| +| static_finding | True | CSPM scan data is static analysis | +| dynamic_finding | False | Not a dynamic/runtime scan | +| active | Varies | Based on Status field ("open" = True) | +| mitigation | Not set | Orca exports do not include remediation text |
@@ -93,21 +93,21 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/
Click to expand Field Mapping Table -| Source Field | DefectDojo Field | Parser File & Line # | Notes | -| ------------ | ---------------- | -------------------- | ----- | -| Title | title | json_parser.py:22, helpers.py:51 | Truncated at 150 characters with "..." suffix | -| OrcaScore | severity | json_parser.py:28, helpers.py:6 | Float mapped to severity string (see Severity Conversion) | -| OrcaScore | severity_justification | json_parser.py:49, helpers.py:29 | Stored as "OrcaScore: X.X" | -| Category | description | json_parser.py:23, helpers.py:67 | Included in structured markdown description | -| Inventory.Name | component_name | json_parser.py:34-35, json_parser.py:53 | Nested object, cloud resource name | -| CloudAccount.Name | description | json_parser.py:31-32, helpers.py:73 | Nested object, included in description and dedup hash | -| Source | service | json_parser.py:24, json_parser.py:52 | Orca resource identifier populates service field | -| Source | description | json_parser.py:24, helpers.py:69 | Also included in description | -| Status | active | json_parser.py:25, json_parser.py:57 | "open" = active, all else = inactive | -| CreatedAt | date | json_parser.py:26, helpers.py:41 | ISO 8601 parsed to date object | -| LastSeen | description | json_parser.py:27, helpers.py:81 | Included in description | -| Labels | tags | json_parser.py:29, json_parser.py:58-59 | Array of strings stored as finding tags | -| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | json_parser.py:54, helpers.py:23 | SHA-256 hash for deduplication | +| Source Field | DefectDojo Field | Notes | +| ------------ | ---------------- | ----- | +| Title | title | Truncated at 500 characters with "..." suffix | +| OrcaScore | severity | Float mapped to severity string (see Severity Conversion) | +| OrcaScore | severity_justification | Stored as "OrcaScore: X.X" | +| Category | description | Included in structured markdown description | +| Inventory.Name | component_name | Nested object, cloud resource name | +| CloudAccount.Name | description | Nested object, included in description and dedup hash | +| Source | service | Orca resource identifier populates service field | +| Source | description | Also included in description | +| Status | active | "open" = active, all else = inactive | +| CreatedAt | date | ISO 8601 parsed to date object | +| LastSeen | description | Included in description | +| Labels | tags | Array of strings stored as finding tags | +| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | SHA-256 hash for deduplication |
@@ -116,12 +116,12 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/
Click to expand Additional Settings Table -| Finding Field | Default Value | Parser File & Line # | Notes | -|---------------|---------------|----------------------|-------| -| static_finding | True | json_parser.py:50 | CSPM scan data is static analysis | -| dynamic_finding | False | json_parser.py:51 | Not a dynamic/runtime scan | -| active | Varies | json_parser.py:57 | Based on Status field ("open" = True) | -| mitigation | Not set | — | Orca exports do not include remediation text | +| Finding Field | Default Value | Notes | +|---------------|---------------|-------| +| static_finding | True | CSPM scan data is static analysis | +| dynamic_finding | False | Not a dynamic/runtime scan | +| active | Varies | Based on Status field ("open" = True) | +| mitigation | Not set | Orca exports do not include remediation text |
@@ -129,11 +129,11 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ ### Date Processing -The parser uses `dateutil.parser.parse()` to handle ISO 8601 date formats from Orca Security exports (helpers.py:41-48). The datetime is converted to a date object using `.date()`. Invalid or missing date strings return `None`. +The parser uses `dateutil.parser.parse()` to handle ISO 8601 date formats from Orca Security exports. The datetime is converted to a date object using `.date()`. Invalid or missing date strings return `None`. ### Severity Conversion -OrcaScore (float 0-10) is converted to DefectDojo severity levels (helpers.py:6-20): +OrcaScore (float 0-10) is converted to DefectDojo severity levels: - `0` or missing → Info - `0.1 - 3.9` → Low - `4.0 - 6.9` → Medium @@ -144,19 +144,19 @@ The conversion uses `float()` with error handling — non-numeric values default ### Severity Justification -The OrcaScore is also stored in the `severity_justification` field as "OrcaScore: X.X" (helpers.py:29-38). This preserves the original numeric score for reference while the severity field contains the mapped categorical value. +The OrcaScore is also stored in the `severity_justification` field as "OrcaScore: X.X". This preserves the original numeric score for reference while the severity field contains the mapped categorical value. ### Description Construction -The parser builds a structured markdown description from all available alert fields (helpers.py:60-86). Each field is formatted as a bold label followed by its value, separated by double newlines. Fields with empty values are omitted. The description includes: Title, Category, Source, Inventory name, Cloud Account name, Orca Score, Status, Created date, Last Seen date, and Labels. +The parser builds a structured markdown description from all available alert fields. Each field is formatted as a bold label followed by its value, separated by double newlines. Fields with empty values are omitted. The description includes: Title, Category, Source, Inventory name, Cloud Account name, Orca Score, Status, Created date, Last Seen date, and Labels. ### Title Format -Finding titles use the alert's Title field directly (helpers.py:51-57). Titles longer than 150 characters are truncated with a "..." suffix. Alerts with no title receive the default "Orca Security Alert". +Finding titles use the alert's Title field directly. Titles longer than 500 characters are truncated with a "..." suffix. Alerts with no title receive the default "Orca Security Alert". ### Service Field -The Source field from Orca Security populates the DefectDojo `service` field (csv_parser.py:58, json_parser.py:52). This represents the cloud resource or service that generated the alert. +The Source field from Orca Security populates the DefectDojo `service` field. This represents the cloud resource or service that generated the alert. ### Mitigation Construction @@ -164,10 +164,10 @@ Orca Security CSV and JSON exports do not include remediation or mitigation text ### Deduplication -The `unique_id_from_tool` field is populated with a SHA-256 hex digest of the concatenation `CloudAccount.Name|Inventory.Name|Title` (helpers.py:23-26). This ensures consistent deduplication across both CSV and JSON imports — the same alert produces the same unique ID regardless of import format. Each row/item in the export becomes one Finding with no internal deduplication. +The `unique_id_from_tool` field is populated with a SHA-256 hex digest of the concatenation `CloudAccount.Name|Inventory.Name|Title`. This ensures consistent deduplication across both CSV and JSON imports — the same alert produces the same unique ID regardless of import format. Each row/item in the export becomes one Finding with no internal deduplication. ### Tags Handling -Labels from Orca Security are stored as finding tags using the `unsaved_tags` field (csv_parser.py:64-65, json_parser.py:58-59). This makes labels searchable and filterable in DefectDojo. +Labels from Orca Security are stored as finding tags using the `unsaved_tags` field. This makes labels searchable and filterable in DefectDojo. -In CSV format, the Labels column contains a JSON-encoded array of strings (csv_parser.py:35-41). The parser uses `json.loads()` to parse this embedded JSON. If parsing fails, the raw string is used as a single tag. In JSON format, Labels is a native array of strings (json_parser.py:29). +In CSV format, the Labels column contains a JSON-encoded array of strings. The parser uses `json.loads()` to parse this embedded JSON. If parsing fails, the raw string is used as a single tag. In JSON format, Labels is a native array of strings. diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 4bf0fbc651e..94cf7d20fd3 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -1752,6 +1752,7 @@ def saml2_attrib_map_format(din): "OpenVAS Parser v2": DEDUPE_ALGO_HASH_CODE, "Snyk Issue API Scan": DEDUPE_ALGO_HASH_CODE, "OpenReports": DEDUPE_ALGO_HASH_CODE, + "Orca Security Alerts": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL, } # Override the hardcoded settings here via the env var diff --git a/dojo/tools/orca_security/helpers.py b/dojo/tools/orca_security/helpers.py index a870aaacb30..1c847517e5a 100644 --- a/dojo/tools/orca_security/helpers.py +++ b/dojo/tools/orca_security/helpers.py @@ -112,7 +112,7 @@ def parse_date(date_string): return None -def truncate_title(title, max_length=150): +def truncate_title(title, max_length=500): """ Truncate title to maximum length with ellipsis suffix. @@ -121,7 +121,7 @@ def truncate_title(title, max_length=150): Args: title: The original title string, or None/empty string - max_length: Maximum allowed length (default 150 characters) + max_length: Maximum allowed length (default 500 characters) Returns: str: Original title if within limit, truncated with "..." if over, diff --git a/dojo/tools/orca_security/parser.py b/dojo/tools/orca_security/parser.py index d18d57a4616..94d3beca11b 100644 --- a/dojo/tools/orca_security/parser.py +++ b/dojo/tools/orca_security/parser.py @@ -1,32 +1,6 @@ -""" -Orca Security parser for DefectDojo. - -Orca Security is a cloud security platform that provides agentless security -and compliance for AWS, Azure, GCP, and Kubernetes environments. This parser -imports Orca Security alert exports in CSV or JSON format. - -For more information about Orca Security, see: https://orca.security/ -""" from dojo.tools.orca_security.csv_parser import OrcaSecurityCSVParser -from dojo.tools.orca_security.helpers import ( - build_description, - build_unique_id, - map_orca_severity, - parse_date, - truncate_title, -) from dojo.tools.orca_security.json_parser import OrcaSecurityJSONParser -# Re-export helpers so existing imports from this module still work -__all__ = [ - "OrcaSecurityParser", - "build_description", - "build_unique_id", - "map_orca_severity", - "parse_date", - "truncate_title", -] - class OrcaSecurityParser: From 931c619dcc7ff43b59c4088ddf78e14a61b2125c Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Thu, 5 Mar 2026 14:53:22 -0700 Subject: [PATCH 12/13] refactor: switch dedup from unique_id_from_tool to hashcode algorithm Reviewers have indicated parsers should not compute unique_id_from_tool. Switch to DEDUPE_ALGO_HASH_CODE using title + component_name fields configured in settings.dist.py. - Remove build_unique_id helper and hashlib import - Remove unique_id_from_tool from CSV and JSON parsers - Add HASHCODE_FIELDS_PER_SCANNER entry for Orca Security Alerts - Change DEDUPLICATION_ALGORITHM_PER_PARSER to DEDUPE_ALGO_HASH_CODE - Update tests and documentation to reflect new dedup approach Authored by T. Walker - DefectDojo --- .../parsers/file/orca_security.md | 11 ++++----- dojo/settings/settings.dist.py | 3 ++- dojo/tools/orca_security/csv_parser.py | 3 --- dojo/tools/orca_security/helpers.py | 23 ------------------- dojo/tools/orca_security/json_parser.py | 3 --- unittests/tools/test_orca_security_parser.py | 12 ---------- 6 files changed, 6 insertions(+), 49 deletions(-) diff --git a/docs/content/supported_tools/parsers/file/orca_security.md b/docs/content/supported_tools/parsers/file/orca_security.md index 1cf49c284bd..2d9c290b1be 100644 --- a/docs/content/supported_tools/parsers/file/orca_security.md +++ b/docs/content/supported_tools/parsers/file/orca_security.md @@ -20,11 +20,10 @@ The parser auto-detects the format: files starting with `[` are treated as JSON, ## Default Deduplication Hashcode Fields -By default, DefectDojo identifies duplicate Findings using the [unique_id_from_tool](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/) field, which is a SHA-256 hash of: +By default, DefectDojo identifies duplicate Findings using the [hashcode deduplication algorithm](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/) with the following fields: -- CloudAccount.Name -- Inventory.Name -- Title +- title +- component_name ### Sample Scan Data @@ -62,7 +61,6 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ | CreatedAt | date | ISO 8601 parsed to date object | | LastSeen | description | Included in description | | Labels | tags | JSON-encoded array parsed and stored as finding tags | -| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | SHA-256 hash for deduplication | @@ -107,7 +105,6 @@ Sample Orca Security scans can be found in the [sample scan data folder](https:/ | CreatedAt | date | ISO 8601 parsed to date object | | LastSeen | description | Included in description | | Labels | tags | Array of strings stored as finding tags | -| CloudAccount.Name+Inventory.Name+Title | unique_id_from_tool | SHA-256 hash for deduplication | @@ -164,7 +161,7 @@ Orca Security CSV and JSON exports do not include remediation or mitigation text ### Deduplication -The `unique_id_from_tool` field is populated with a SHA-256 hex digest of the concatenation `CloudAccount.Name|Inventory.Name|Title`. This ensures consistent deduplication across both CSV and JSON imports — the same alert produces the same unique ID regardless of import format. Each row/item in the export becomes one Finding with no internal deduplication. +Deduplication uses the hashcode algorithm configured in `settings.dist.py` with the fields `title` and `component_name`. This ensures findings with the same alert title on the same resource are deduplicated across reimports. Each row/item in the export becomes one Finding with no internal deduplication. ### Tags Handling diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 94cf7d20fd3..b329d15c074 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -1488,6 +1488,7 @@ def saml2_attrib_map_format(din): "Snyk Issue API Scan": ["vuln_id_from_tool", "file_path"], "OpenReports": ["vulnerability_ids", "component_name", "component_version", "severity"], "n0s1 Scanner": ["description"], + "Orca Security Alerts": ["title", "component_name"], } # Override the hardcoded settings here via the env var @@ -1752,7 +1753,7 @@ def saml2_attrib_map_format(din): "OpenVAS Parser v2": DEDUPE_ALGO_HASH_CODE, "Snyk Issue API Scan": DEDUPE_ALGO_HASH_CODE, "OpenReports": DEDUPE_ALGO_HASH_CODE, - "Orca Security Alerts": DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL, + "Orca Security Alerts": DEDUPE_ALGO_HASH_CODE, } # Override the hardcoded settings here via the env var diff --git a/dojo/tools/orca_security/csv_parser.py b/dojo/tools/orca_security/csv_parser.py index 77fb82d4c21..6ca3c5790e2 100644 --- a/dojo/tools/orca_security/csv_parser.py +++ b/dojo/tools/orca_security/csv_parser.py @@ -18,7 +18,6 @@ from dojo.tools.orca_security.helpers import ( build_description, build_severity_justification, - build_unique_id, map_orca_severity, parse_date, truncate_title, @@ -87,8 +86,6 @@ def parse(self, content): dynamic_finding=False, service=source or None, # Source identifies the cloud resource/service component_name=inventory_name or None, # Inventory is the specific resource - # Dedup hash uses cloud account + inventory + title for uniqueness - unique_id_from_tool=build_unique_id(cloud_account_name, inventory_name, title_raw), date=parse_date(created_at), ) diff --git a/dojo/tools/orca_security/helpers.py b/dojo/tools/orca_security/helpers.py index 1c847517e5a..f3f2b4dc2a7 100644 --- a/dojo/tools/orca_security/helpers.py +++ b/dojo/tools/orca_security/helpers.py @@ -4,8 +4,6 @@ This module contains utility functions used by both the CSV and JSON parsers to ensure consistent behavior across input formats. """ -import hashlib - from dateutil import parser as dateutil_parser @@ -45,27 +43,6 @@ def map_orca_severity(score): return "Critical" -def build_unique_id(cloud_account_name, inventory_name, title): - """ - Generate a unique identifier for deduplication. - - Creates a SHA-256 hash from the combination of cloud account, inventory, - and title fields. This ensures the same alert produces the same ID - regardless of whether it's imported from CSV or JSON format. - - Args: - cloud_account_name: The name of the cloud account (e.g., "prod-aws-account") - inventory_name: The name of the inventory/resource (e.g., "my-s3-bucket") - title: The alert title (e.g., "Public S3 bucket detected") - - Returns: - str: 64-character hexadecimal SHA-256 hash - - """ - raw = f"{cloud_account_name}|{inventory_name}|{title}" - return hashlib.sha256(raw.encode("utf-8")).hexdigest() - - def build_severity_justification(orca_score): """ Build severity justification string from OrcaScore. diff --git a/dojo/tools/orca_security/json_parser.py b/dojo/tools/orca_security/json_parser.py index 3c40b330b6e..36b95362e9e 100644 --- a/dojo/tools/orca_security/json_parser.py +++ b/dojo/tools/orca_security/json_parser.py @@ -28,7 +28,6 @@ from dojo.tools.orca_security.helpers import ( build_description, build_severity_justification, - build_unique_id, map_orca_severity, parse_date, truncate_title, @@ -92,8 +91,6 @@ def parse(self, content): dynamic_finding=False, service=source or None, # Source identifies the cloud resource/service component_name=inventory_name or None, # Inventory is the specific resource - # Dedup hash uses cloud account + inventory + title for uniqueness - unique_id_from_tool=build_unique_id(cloud_account_name, inventory_name, title_raw), date=parse_date(created_at), ) diff --git a/unittests/tools/test_orca_security_parser.py b/unittests/tools/test_orca_security_parser.py index f1e33f24f56..95d3ac9c472 100644 --- a/unittests/tools/test_orca_security_parser.py +++ b/unittests/tools/test_orca_security_parser.py @@ -27,7 +27,6 @@ def test_parse_csv_one_finding(self): self.assertEqual("TestRole_abc123", finding.component_name) self.assertEqual("TestRole_abc123", finding.service) self.assertEqual("OrcaScore: 5.1", finding.severity_justification) - self.assertIsNotNone(finding.unique_id_from_tool) self.assertIn("IAM misconfigurations", finding.description) self.assertEqual(["CSPM", "source: Orca Scan"], finding.unsaved_tags) @@ -72,7 +71,6 @@ def test_parse_json_one_finding(self): self.assertEqual("TestRole_abc123", finding.component_name) self.assertEqual("TestRole_abc123", finding.service) self.assertEqual("OrcaScore: 5.1", finding.severity_justification) - self.assertIsNotNone(finding.unique_id_from_tool) self.assertIn("IAM misconfigurations", finding.description) self.assertEqual(["CSPM", "source: Orca Scan"], finding.unsaved_tags) @@ -97,16 +95,6 @@ def test_parse_json_many_findings(self): # --- Cross-format consistency tests --- - def test_unique_id_from_tool_is_consistent(self): - """Same alert data in CSV and JSON should produce the same unique_id_from_tool.""" - with (get_unit_tests_scans_path("orca_security") / "one_vuln.csv").open(encoding="utf-8") as csv_file: - csv_findings = OrcaSecurityParser().get_findings(csv_file, Test()) - - with (get_unit_tests_scans_path("orca_security") / "one_vuln.json").open(encoding="utf-8") as json_file: - json_findings = OrcaSecurityParser().get_findings(json_file, Test()) - - self.assertEqual(csv_findings[0].unique_id_from_tool, json_findings[0].unique_id_from_tool) - def test_date_is_parsed(self): """CreatedAt should be parsed into a date object.""" with (get_unit_tests_scans_path("orca_security") / "one_vuln.json").open(encoding="utf-8") as testfile: From 9765bc99d9812a7d838592b9a5f2b5de845a5bc4 Mon Sep 17 00:00:00 2001 From: skywalke34 Date: Thu, 5 Mar 2026 16:42:51 -0700 Subject: [PATCH 13/13] chore: retrigger CI (runner cancellation on arm64) Authored by T. Walker - DefectDojo