Skip to content

Commit 14a43da

Browse files
committed
feat: upgrade to SDK 2.1.8 with lazy loading and improved committer handling
- Upgrade socket-sdk-python dependency to version 2.1.8 to support lazy file loading capabilities - Enable lazy file loading in fullscans.post() with use_lazy_loading=True and max_open_files=50 to prevent "Too many open files" errors when processing large numbers of manifest files - Remove custom lazy_file_loader module as this functionality is now handled by the SDK - Fix committer display format by implementing proper priority order: 1. CLI --committers argument (highest priority) 2. CI/CD SCM username (GITHUB_ACTOR, GITLAB_USER_LOGIN, BITBUCKET_STEP_TRIGGERER_UUID) 3. Git username extracted from email patterns (e.g., GitHub noreply emails) 4. Git email address 5. Git author name (fallback) - Add get_formatted_committer() method to Git class to properly format committer strings instead of displaying raw git.Actor objects - Include license alerts in diff processing by removing licenseSpdxDisj filter condition - Change ulimit warning messages from log.warning to log.debug to reduce noise - Update create_full_scan() method signature to accept file paths directly instead of pre-processed file objects - Remove deprecated load_files_for_sending() method as lazy loading is now handled by the SDK This update improves performance for large repositories, provides better committer identification in CI/CD environments, and ensures license violations are properly reported.
1 parent 095b0cc commit 14a43da

File tree

6 files changed

+88
-41
lines changed

6 files changed

+88
-41
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
66

77
[project]
88
name = "socketsecurity"
9-
version = "2.1.35"
9+
version = "2.1.36"
1010
requires-python = ">= 3.10"
1111
license = {"file" = "LICENSE"}
1212
dependencies = [
@@ -16,7 +16,7 @@ dependencies = [
1616
'GitPython',
1717
'packaging',
1818
'python-dotenv',
19-
'socket-sdk-python>=2.1.5,<3'
19+
'socket-sdk-python>=2.1.8,<3'
2020
]
2121
readme = "README.md"
2222
description = "Socket Security CLI for CI/CD"

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ requests==2.32.4
5959
# via socketsecurity
6060
smmap==5.0.2
6161
# via gitdb
62-
socket-sdk-python==2.1.5
62+
socket-sdk-python==2.1.8
6363
# via socketsecurity
6464
typing-extensions==4.12.2
6565
# via socket-sdk-python

socketsecurity/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
__author__ = 'socket.dev'
2-
__version__ = '2.1.35'
2+
__version__ = '2.1.36'

socketsecurity/core/__init__.py

Lines changed: 22 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from .socket_config import SocketConfig
3131
from .utils import socket_globs
3232
from .resource_utils import check_file_count_against_ulimit
33-
from .lazy_file_loader import load_files_for_sending_lazy
3433
import importlib
3534
logging_std = importlib.import_module("logging")
3635

@@ -338,10 +337,10 @@ def find_files(self, path: str) -> List[str]:
338337
ulimit_check = check_file_count_against_ulimit(file_count)
339338
if ulimit_check["can_check"]:
340339
if ulimit_check["would_exceed"]:
341-
log.warning(f"Found {file_count} manifest files, which may exceed the file descriptor limit (ulimit -n = {ulimit_check['soft_limit']})")
342-
log.warning(f"Available file descriptors: {ulimit_check['available_fds']} (after {ulimit_check['buffer_size']} buffer)")
343-
log.warning(f"Recommendation: {ulimit_check['recommendation']}")
344-
log.warning("This may cause 'Too many open files' errors during processing")
340+
log.debug(f"Found {file_count} manifest files, which may exceed the file descriptor limit (ulimit -n = {ulimit_check['soft_limit']})")
341+
log.debug(f"Available file descriptors: {ulimit_check['available_fds']} (after {ulimit_check['buffer_size']} buffer)")
342+
log.debug(f"Recommendation: {ulimit_check['recommendation']}")
343+
log.debug("This may cause 'Too many open files' errors during processing")
345344
else:
346345
log.debug(f"File count ({file_count}) is within file descriptor limit ({ulimit_check['soft_limit']})")
347346
else:
@@ -441,30 +440,12 @@ def empty_head_scan_file() -> list[tuple[str, tuple[str, Union[BinaryIO, BytesIO
441440
empty_full_scan_file = [(empty_filename, (empty_filename, empty_file_obj))]
442441
return empty_full_scan_file
443442

444-
@staticmethod
445-
def load_files_for_sending(files: List[str], workspace: str) -> List[Tuple[str, Tuple[str, BinaryIO]]]:
446-
"""
447-
Prepares files for sending to the Socket API using lazy loading.
448-
449-
This version uses lazy file loading to prevent "Too many open files" errors
450-
when processing large numbers of manifest files.
451-
452-
Args:
453-
files: List of file paths from find_files()
454-
workspace: Base directory path to make paths relative to
455-
456-
Returns:
457-
List of tuples formatted for requests multipart upload:
458-
[(field_name, (filename, file_object)), ...]
459-
"""
460-
return load_files_for_sending_lazy(files, workspace)
461-
462-
def create_full_scan(self, files: list[tuple[str, tuple[str, BytesIO]]], params: FullScanParams) -> FullScan:
443+
def create_full_scan(self, files: List[str], params: FullScanParams) -> FullScan:
463444
"""
464445
Creates a new full scan via the Socket API.
465446
466447
Args:
467-
files: List of files to scan
448+
files: List of file paths to scan
468449
params: Parameters for the full scan
469450
470451
Returns:
@@ -473,7 +454,7 @@ def create_full_scan(self, files: list[tuple[str, tuple[str, BytesIO]]], params:
473454
log.info("Creating new full scan")
474455
create_full_start = time.time()
475456

476-
res = self.sdk.fullscans.post(files, params, use_types=True)
457+
res = self.sdk.fullscans.post(files, params, use_types=True, use_lazy_loading=True, max_open_files=50)
477458
if not res.success:
478459
log.error(f"Error creating full scan: {res.message}, status: {res.status}")
479460
raise Exception(f"Error creating full scan: {res.message}, status: {res.status}")
@@ -525,14 +506,13 @@ def create_full_scan_with_report_url(
525506
if save_manifest_tar_path and files:
526507
self.save_manifest_tar(files, save_manifest_tar_path, path)
527508

528-
files_for_sending = self.load_files_for_sending(files, path)
529509
if not files:
530510
return diff
531511

532512
try:
533513
# Create new scan
534514
new_scan_start = time.time()
535-
new_full_scan = self.create_full_scan(files_for_sending, params)
515+
new_full_scan = self.create_full_scan(files, params)
536516
new_scan_end = time.time()
537517
log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
538518
except APIFailure as e:
@@ -779,7 +759,15 @@ def get_added_and_removed_packages(
779759
log.info(f"Comparing scans - Head scan ID: {head_full_scan_id}, New scan ID: {new_full_scan_id}")
780760
diff_start = time.time()
781761
try:
782-
diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan_id, new_full_scan_id, use_types=True).data
762+
diff_report = (
763+
self.sdk.fullscans.stream_diff
764+
(
765+
self.config.org_slug,
766+
head_full_scan_id,
767+
new_full_scan_id,
768+
use_types=True
769+
).data
770+
)
783771
except APIFailure as e:
784772
log.error(f"API Error: {e}")
785773
sys.exit(1)
@@ -877,7 +865,6 @@ def create_new_diff(
877865
if save_manifest_tar_path and files:
878866
self.save_manifest_tar(files, save_manifest_tar_path, path)
879867

880-
files_for_sending = self.load_files_for_sending(files, path)
881868
if not files:
882869
return Diff(id="NO_DIFF_RAN", diff_url="", report_url="")
883870

@@ -901,7 +888,7 @@ def create_new_diff(
901888
# Create new scan
902889
try:
903890
new_scan_start = time.time()
904-
new_full_scan = self.create_full_scan(files_for_sending, params)
891+
new_full_scan = self.create_full_scan(files, params)
905892
new_scan_end = time.time()
906893
log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
907894
except APIFailure as e:
@@ -1156,11 +1143,10 @@ def add_package_alerts_to_collection(self, package: Package, alerts_collection:
11561143
action = self.config.security_policy[alert.type]['action']
11571144
setattr(issue_alert, action, True)
11581145

1159-
if issue_alert.type != 'licenseSpdxDisj':
1160-
if issue_alert.key not in alerts_collection:
1161-
alerts_collection[issue_alert.key] = [issue_alert]
1162-
else:
1163-
alerts_collection[issue_alert.key].append(issue_alert)
1146+
if issue_alert.key not in alerts_collection:
1147+
alerts_collection[issue_alert.key] = [issue_alert]
1148+
else:
1149+
alerts_collection[issue_alert.key].append(issue_alert)
11641150

11651151
return alerts_collection
11661152

socketsecurity/core/git_interface.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,67 @@ def commit_str(self) -> str:
319319
"""Return commit SHA as a string"""
320320
return self.commit.hexsha
321321

322+
def get_formatted_committer(self) -> str:
323+
"""
324+
Get the committer in the preferred order:
325+
1. CLI --committers (handled in socketcli.py)
326+
2. CI/CD SCM username (GitHub/GitLab/BitBucket environment variables)
327+
3. Git username (extracted from email patterns like GitHub noreply)
328+
4. Git email address
329+
5. Git author name (fallback)
330+
331+
Returns:
332+
Formatted committer string
333+
"""
334+
# Check for CI/CD environment usernames first
335+
# GitHub Actions
336+
github_actor = os.getenv('GITHUB_ACTOR')
337+
if github_actor:
338+
log.debug(f"Using GitHub actor as committer: {github_actor}")
339+
return github_actor
340+
341+
# GitLab CI
342+
gitlab_user_login = os.getenv('GITLAB_USER_LOGIN')
343+
if gitlab_user_login:
344+
log.debug(f"Using GitLab user login as committer: {gitlab_user_login}")
345+
return gitlab_user_login
346+
347+
# Bitbucket Pipelines
348+
bitbucket_step_triggerer_uuid = os.getenv('BITBUCKET_STEP_TRIGGERER_UUID')
349+
if bitbucket_step_triggerer_uuid:
350+
log.debug(f"Using Bitbucket step triggerer as committer: {bitbucket_step_triggerer_uuid}")
351+
return bitbucket_step_triggerer_uuid
352+
353+
# Fall back to commit author/committer details
354+
# Priority 3: Try to extract git username from email patterns first
355+
if self.author and self.author.email and self.author.email.strip():
356+
email = self.author.email.strip()
357+
358+
# If it's a GitHub noreply email, try to extract username
359+
if email.endswith('@users.noreply.github.com'):
360+
# Pattern: number+username@users.noreply.github.com
361+
email_parts = email.split('@')[0]
362+
if '+' in email_parts:
363+
username = email_parts.split('+')[1]
364+
log.debug(f"Extracted GitHub username from noreply email: {username}")
365+
return username
366+
367+
# Priority 4: Use email if available
368+
if self.author and self.author.email and self.author.email.strip():
369+
email = self.author.email.strip()
370+
log.debug(f"Using commit author email as committer: {email}")
371+
return email
372+
373+
# Priority 5: Fall back to author name as last resort
374+
if self.author and self.author.name and self.author.name.strip():
375+
name = self.author.name.strip()
376+
log.debug(f"Using commit author name as fallback committer: {name}")
377+
return name
378+
379+
# Ultimate fallback
380+
log.debug("Using fallback committer: unknown")
381+
return "unknown"
382+
322383
def get_default_branch_name(self) -> str:
323384
"""
324385
Get the default branch name from the remote origin.

socketsecurity/socketcli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def main_code():
125125
if not config.branch:
126126
config.branch = git_repo.branch
127127
if not config.committers:
128-
config.committers = [git_repo.author]
128+
config.committers = [git_repo.get_formatted_committer()]
129129
if not config.commit_message:
130130
config.commit_message = git_repo.commit_message
131131
except InvalidGitRepositoryError:

0 commit comments

Comments
 (0)