Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion codesectools/datasets/BenchmarkJava/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from codesectools.datasets.core.dataset import File, PrebuiltFileDataset
from codesectools.shared.cwe import CWE, CWEs
from codesectools.utils import CPU_COUNT


class TestCode(File):
Expand Down Expand Up @@ -65,7 +66,7 @@ class BenchmarkJava(PrebuiltFileDataset):
license = "GPL-2.0"
license_url = "https://github.com/OWASP-Benchmark/BenchmarkJava/blob/master/LICENSE"

build_command = "mvn clean compile"
build_command = f"mvn clean compile -T {CPU_COUNT // 2}"
prebuilt_expected = (Path("target/classes/org/owasp/benchmark/testcode"), "*.class")
artifacts_arg = "."

Expand Down
9 changes: 5 additions & 4 deletions codesectools/sasts/all/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,11 @@ def stats_by_scores(self) -> dict:
]
)

for line in defect.lines:
if not defect_locations.get(line):
defect_locations[line] = []
defect_locations[line].append(defect)
if defect.lines:
for line in defect.lines:
if not defect_locations.get(line):
defect_locations[line] = []
defect_locations[line].append(defect)

same_location = 0
same_location_same_cwe = 0
Expand Down
9 changes: 7 additions & 2 deletions codesectools/sasts/all/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def generate_single_defect(self, defect_file: dict) -> str:
else "None"
)
rows.append(
(float("inf"), "None", defect.sast, cwe_link, defect.message)
(float("inf"), "None", defect.sast_name, cwe_link, defect.message)
)

for row in sorted(rows, key=lambda r: r[0]):
Expand Down Expand Up @@ -176,7 +176,12 @@ def generate_single_defect(self, defect_file: dict) -> str:

html_content = file_page.export_html(code_format=self.TEMPLATE)
html_content = html_content.replace('href="HACK', 'id="')
html_content = html_content.replace("[name]", defect_file["source_path"])
html_content = html_content.replace(
"[name]",
str(
Path(defect_file["source_path"]).relative_to(self.result.source_path) # ty:ignore[no-matching-overload]
),
)
html_content = html_content.replace("[tippy_calls]", tippy_calls)

return html_content
Expand Down
5 changes: 4 additions & 1 deletion codesectools/sasts/core/parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ def __init__(
lines: A list of line numbers where the defect is located.

"""
if not filepath.is_file():
raise FileNotFoundError(filepath.resolve())
self.filepath = filepath
self.filepath_str = str(filepath)
self.filepath_str = str(filepath.resolve())
self.filename = filepath.name
self.sast_name = sast_name
self.checker = checker
Expand All @@ -70,6 +72,7 @@ def __repr__(self) -> str:

"""
return f"""{self.__class__.__name__}(
sast: \t{self.sast_name}
filepath: \t{self.filepath}
checker: \t{self.checker}
level: \t{self.level}
Expand Down
10 changes: 7 additions & 3 deletions codesectools/sasts/core/parser/format/SARIF/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ def __init__(
for result in self.results:
filepath, lines = self.get_location(result)

if not filepath:
continue

if rule_id := result.rule_id:
rule = self.rules[rule_id]

Expand Down Expand Up @@ -114,15 +117,16 @@ def get_rule_properties(self, rule_id: str) -> PropertyBag | None:
return properties
return None

def get_location(self, result: Result) -> tuple[Path, list[int] | None]:
def get_location(self, result: Result) -> tuple[Path | None, list[int]]:
"""Extract the file path and line numbers from a SARIF result."""
lines = None
filepath = None
lines = []
if result.locations:
if physical_location := result.locations[0].physical_location:
if root := physical_location.root:
if artifact_location := root.artifact_location:
if uri := artifact_location.uri:
filepath = Path(uri)
filepath = self.source_path / uri

if region := root.region:
lines = [
Expand Down
30 changes: 14 additions & 16 deletions codesectools/sasts/tools/SpotBugs/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from __future__ import annotations

from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING, Any

Expand All @@ -29,30 +30,27 @@ class SpotBugsAnalysisResult(SARIFAnalysisResult):
# /home/user/mypackage/src/main/java/org/mypackage/...
def patch_dict(self, sarif_dict: dict) -> dict:
"""Patch the SARIF dictionary to resolve relative Java class paths."""
partial_parents = {}
file_index = defaultdict(list)
for file_path in self.source_path.rglob("*.java"): # SpotBugs only support Java
file_index[file_path.name].append(file_path)

def recursive_patch(data: Any) -> None: # noqa: ANN401
if isinstance(data, dict):
for key, value in data.items():
if key == "uri":
partial_filepath = Path(value)
if partial_filepath.parent not in partial_parents:
if next(
self.source_path.rglob(str(partial_filepath)), None
candidates = file_index.get(partial_filepath.name, [])

found = None
for candidate in candidates:
if (
candidate.parts[-len(partial_filepath.parts) :]
== partial_filepath.parts
):
filepath = next(
self.source_path.rglob(str(partial_filepath))
).relative_to(self.source_path)
partial_parents[partial_filepath.parent] = (
filepath.parent
)
else:
filepath = (
partial_parents[partial_filepath.parent]
/ partial_filepath.name
)
found = str(candidate.resolve())
break

data[key] = str(filepath)
data[key] = found if found else None
else:
recursive_patch(value)

Expand Down
2 changes: 2 additions & 0 deletions codesectools/sasts/tools/SpotBugs/sast.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class SpotBugsSAST(PrebuiltSAST):
[
"spotbugs",
"-textui",
"-maxHeap",
"4096",
"-nested:true",
"-progress",
"-sarif=spotbugs.sarif",
Expand Down
2 changes: 1 addition & 1 deletion codesectools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,4 +259,4 @@ def shorten_path(path: str, max_len: int = 20) -> str:
return str(shortened_path)


CPU_COUNT = os.cpu_count()
CPU_COUNT = os.cpu_count() or 2
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "CodeSecTools"
version = "0.15.1"
version = "0.15.2"
description = "A framework for code security that provides abstractions for static analysis tools and datasets to support their integration, testing, and evaluation."
readme = "README.md"
license = "AGPL-3.0-only"
Expand Down
6 changes: 4 additions & 2 deletions tests/test_all_sasts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from codesectools.sasts import SASTS_ALL
from codesectools.sasts.all.cli import build_cli
from codesectools.sasts.all.sast import AllSAST
from codesectools.utils import run_command
from codesectools.utils import CPU_COUNT, run_command

all_sast = AllSAST()

Expand Down Expand Up @@ -46,7 +46,9 @@ def test_analyze(monkeypatch: pytest.MonkeyPatch) -> None:
git.Repo.clone_from("https://github.com/appsecco/dvja.git", "/tmp/dvja")
monkeypatch.chdir("/tmp/dvja")

retcode, stdout = run_command("mvn clean compile".split(" "), cwd=Path("/tmp/dvja"))
retcode, stdout = run_command(
f"mvn clean compile -T {CPU_COUNT // 2}".split(" "), cwd=Path("/tmp/dvja")
)
assert retcode == 0

result = runner.invoke(
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.