diff --git a/codesectools/datasets/BenchmarkJava/dataset.py b/codesectools/datasets/BenchmarkJava/dataset.py index 62078ca..77ff9a3 100644 --- a/codesectools/datasets/BenchmarkJava/dataset.py +++ b/codesectools/datasets/BenchmarkJava/dataset.py @@ -12,6 +12,7 @@ from codesectools.datasets.core.dataset import File, PrebuiltFileDataset from codesectools.shared.cwe import CWE, CWEs +from codesectools.utils import CPU_COUNT class TestCode(File): @@ -65,7 +66,7 @@ class BenchmarkJava(PrebuiltFileDataset): license = "GPL-2.0" license_url = "https://github.com/OWASP-Benchmark/BenchmarkJava/blob/master/LICENSE" - build_command = "mvn clean compile" + build_command = f"mvn clean compile -T {CPU_COUNT // 2}" prebuilt_expected = (Path("target/classes/org/owasp/benchmark/testcode"), "*.class") artifacts_arg = "." diff --git a/codesectools/sasts/all/parser.py b/codesectools/sasts/all/parser.py index 5def64d..b8d8296 100644 --- a/codesectools/sasts/all/parser.py +++ b/codesectools/sasts/all/parser.py @@ -151,10 +151,11 @@ def stats_by_scores(self) -> dict: ] ) - for line in defect.lines: - if not defect_locations.get(line): - defect_locations[line] = [] - defect_locations[line].append(defect) + if defect.lines: + for line in defect.lines: + if not defect_locations.get(line): + defect_locations[line] = [] + defect_locations[line].append(defect) same_location = 0 same_location_same_cwe = 0 diff --git a/codesectools/sasts/all/report.py b/codesectools/sasts/all/report.py index b53c5a8..a39b4de 100644 --- a/codesectools/sasts/all/report.py +++ b/codesectools/sasts/all/report.py @@ -130,7 +130,7 @@ def generate_single_defect(self, defect_file: dict) -> str: else "None" ) rows.append( - (float("inf"), "None", defect.sast, cwe_link, defect.message) + (float("inf"), "None", defect.sast_name, cwe_link, defect.message) ) for row in sorted(rows, key=lambda r: r[0]): @@ -176,7 +176,12 @@ def generate_single_defect(self, defect_file: dict) -> str: html_content = file_page.export_html(code_format=self.TEMPLATE) html_content = html_content.replace('href="HACK', 'id="') - html_content = html_content.replace("[name]", defect_file["source_path"]) + html_content = html_content.replace( + "[name]", + str( + Path(defect_file["source_path"]).relative_to(self.result.source_path) # ty:ignore[no-matching-overload] + ), + ) html_content = html_content.replace("[tippy_calls]", tippy_calls) return html_content diff --git a/codesectools/sasts/core/parser/__init__.py b/codesectools/sasts/core/parser/__init__.py index d9d62d9..0229ec9 100644 --- a/codesectools/sasts/core/parser/__init__.py +++ b/codesectools/sasts/core/parser/__init__.py @@ -52,8 +52,10 @@ def __init__( lines: A list of line numbers where the defect is located. """ + if not filepath.is_file(): + raise FileNotFoundError(filepath.resolve()) self.filepath = filepath - self.filepath_str = str(filepath) + self.filepath_str = str(filepath.resolve()) self.filename = filepath.name self.sast_name = sast_name self.checker = checker @@ -70,6 +72,7 @@ def __repr__(self) -> str: """ return f"""{self.__class__.__name__}( + sast: \t{self.sast_name} filepath: \t{self.filepath} checker: \t{self.checker} level: \t{self.level} diff --git a/codesectools/sasts/core/parser/format/SARIF/parser.py b/codesectools/sasts/core/parser/format/SARIF/parser.py index 90dd79d..3c08507 100644 --- a/codesectools/sasts/core/parser/format/SARIF/parser.py +++ b/codesectools/sasts/core/parser/format/SARIF/parser.py @@ -54,6 +54,9 @@ def __init__( for result in self.results: filepath, lines = self.get_location(result) + if not filepath: + continue + if rule_id := result.rule_id: rule = self.rules[rule_id] @@ -114,15 +117,16 @@ def get_rule_properties(self, rule_id: str) -> PropertyBag | None: return properties return None - def get_location(self, result: Result) -> tuple[Path, list[int] | None]: + def get_location(self, result: Result) -> tuple[Path | None, list[int]]: """Extract the file path and line numbers from a SARIF result.""" - lines = None + filepath = None + lines = [] if result.locations: if physical_location := result.locations[0].physical_location: if root := physical_location.root: if artifact_location := root.artifact_location: if uri := artifact_location.uri: - filepath = Path(uri) + filepath = self.source_path / uri if region := root.region: lines = [ diff --git a/codesectools/sasts/tools/SpotBugs/parser.py b/codesectools/sasts/tools/SpotBugs/parser.py index 9d53d30..4466b25 100644 --- a/codesectools/sasts/tools/SpotBugs/parser.py +++ b/codesectools/sasts/tools/SpotBugs/parser.py @@ -7,6 +7,7 @@ from __future__ import annotations +from collections import defaultdict from pathlib import Path from typing import TYPE_CHECKING, Any @@ -29,30 +30,27 @@ class SpotBugsAnalysisResult(SARIFAnalysisResult): # /home/user/mypackage/src/main/java/org/mypackage/... def patch_dict(self, sarif_dict: dict) -> dict: """Patch the SARIF dictionary to resolve relative Java class paths.""" - partial_parents = {} + file_index = defaultdict(list) + for file_path in self.source_path.rglob("*.java"): # SpotBugs only support Java + file_index[file_path.name].append(file_path) def recursive_patch(data: Any) -> None: # noqa: ANN401 if isinstance(data, dict): for key, value in data.items(): if key == "uri": partial_filepath = Path(value) - if partial_filepath.parent not in partial_parents: - if next( - self.source_path.rglob(str(partial_filepath)), None + candidates = file_index.get(partial_filepath.name, []) + + found = None + for candidate in candidates: + if ( + candidate.parts[-len(partial_filepath.parts) :] + == partial_filepath.parts ): - filepath = next( - self.source_path.rglob(str(partial_filepath)) - ).relative_to(self.source_path) - partial_parents[partial_filepath.parent] = ( - filepath.parent - ) - else: - filepath = ( - partial_parents[partial_filepath.parent] - / partial_filepath.name - ) + found = str(candidate.resolve()) + break - data[key] = str(filepath) + data[key] = found if found else None else: recursive_patch(value) diff --git a/codesectools/sasts/tools/SpotBugs/sast.py b/codesectools/sasts/tools/SpotBugs/sast.py index ff06e6e..f7372e2 100644 --- a/codesectools/sasts/tools/SpotBugs/sast.py +++ b/codesectools/sasts/tools/SpotBugs/sast.py @@ -61,6 +61,8 @@ class SpotBugsSAST(PrebuiltSAST): [ "spotbugs", "-textui", + "-maxHeap", + "4096", "-nested:true", "-progress", "-sarif=spotbugs.sarif", diff --git a/codesectools/utils.py b/codesectools/utils.py index f3aa49c..f111aa5 100644 --- a/codesectools/utils.py +++ b/codesectools/utils.py @@ -259,4 +259,4 @@ def shorten_path(path: str, max_len: int = 20) -> str: return str(shortened_path) -CPU_COUNT = os.cpu_count() +CPU_COUNT = os.cpu_count() or 2 diff --git a/pyproject.toml b/pyproject.toml index 5c931d8..2f8e35c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "CodeSecTools" -version = "0.15.1" +version = "0.15.2" description = "A framework for code security that provides abstractions for static analysis tools and datasets to support their integration, testing, and evaluation." readme = "README.md" license = "AGPL-3.0-only" diff --git a/tests/test_all_sasts.py b/tests/test_all_sasts.py index 0db0155..7998fc8 100644 --- a/tests/test_all_sasts.py +++ b/tests/test_all_sasts.py @@ -11,7 +11,7 @@ from codesectools.sasts import SASTS_ALL from codesectools.sasts.all.cli import build_cli from codesectools.sasts.all.sast import AllSAST -from codesectools.utils import run_command +from codesectools.utils import CPU_COUNT, run_command all_sast = AllSAST() @@ -46,7 +46,9 @@ def test_analyze(monkeypatch: pytest.MonkeyPatch) -> None: git.Repo.clone_from("https://github.com/appsecco/dvja.git", "/tmp/dvja") monkeypatch.chdir("/tmp/dvja") - retcode, stdout = run_command("mvn clean compile".split(" "), cwd=Path("/tmp/dvja")) + retcode, stdout = run_command( + f"mvn clean compile -T {CPU_COUNT // 2}".split(" "), cwd=Path("/tmp/dvja") + ) assert retcode == 0 result = runner.invoke( diff --git a/uv.lock b/uv.lock index 2913e35..d29c33b 100644 --- a/uv.lock +++ b/uv.lock @@ -239,7 +239,7 @@ wheels = [ [[package]] name = "codesectools" -version = "0.15.1" +version = "0.15.2" source = { editable = "." } dependencies = [ { name = "gitpython" },