Skip to content

Commit 8fbc35a

Browse files
dacoburnflowstate
andauthoredFeb 26, 2025
Fixes for diff logic (#57)
--------- Co-authored-by: Eric Hibbs <eric@socket.dev>
1 parent 7b7089e commit 8fbc35a

File tree

11 files changed

+167
-68
lines changed

11 files changed

+167
-68
lines changed
 

‎.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@ file_generator.py
2323
.coverage
2424
.env.local
2525
Pipfile
26-
test/
26+
test/
27+
logs

‎Pipfile.lock

+20
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎README.md

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ socketcli [-h] [--api-token API_TOKEN] [--repo REPO] [--integration {api,github,
1010
[--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--files FILES] [--default-branch] [--pending-head]
1111
[--generate-license] [--enable-debug] [--enable-json] [--enable-sarif] [--disable-overview] [--disable-security-issue]
1212
[--allow-unverified] [--ignore-commit-files] [--disable-blocking] [--scm SCM] [--timeout TIMEOUT]
13+
[--exclude-license-details]
1314
````
1415

1516
If you don't want to provide the Socket API Token every time then you can use the environment variable `SOCKET_SECURITY_API_KEY`
@@ -58,6 +59,7 @@ If you don't want to provide the Socket API Token every time then you can use th
5859
| --enable-json | False | False | Output in JSON format |
5960
| --enable-sarif | False | False | Enable SARIF output of results instead of table or JSON format|
6061
| --disable-overview | False | False | Disable overview output |
62+
| --exclude-license-details | False | False | Exclude license details from the diff report (boosts performance for large repos) |
6163

6264
#### Security Configuration
6365
| Parameter | Required | Default | Description |

‎pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ dependencies = [
1212
'prettytable',
1313
'GitPython',
1414
'packaging',
15-
'python-dotenv',
16-
'socket-sdk-python>=2.0.5'
15+
'python-dotenv',
16+
'socket-sdk-python>=2.0.7'
1717
]
1818
readme = "README.md"
1919
description = "Socket Security CLI for CI/CD"

‎socketsecurity/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
__author__ = 'socket.dev'
2-
__version__ = '2.0.6'
2+
__version__ = '2.0.7'

‎socketsecurity/config.py

+8
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class CliConfig:
3333
integration_org_slug: Optional[str] = None
3434
pending_head: bool = False
3535
timeout: Optional[int] = 1200
36+
exclude_license_details: bool = False
3637
@classmethod
3738
def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
3839
parser = create_argument_parser()
@@ -71,6 +72,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig':
7172
'integration_type': args.integration,
7273
'pending_head': args.pending_head,
7374
'timeout': args.timeout,
75+
'exclude_license_details': args.exclude_license_details,
7476
}
7577

7678
if args.owner:
@@ -283,6 +285,12 @@ def create_argument_parser() -> argparse.ArgumentParser:
283285
action="store_true",
284286
help=argparse.SUPPRESS
285287
)
288+
output_group.add_argument(
289+
"--exclude-license-details",
290+
dest="exclude_license_details",
291+
action="store_true",
292+
help="Exclude license details from the diff report (boosts performance for large repos)"
293+
)
286294

287295
# Security Configuration
288296
security_group = parser.add_argument_group('Security Configuration')

‎socketsecurity/core/__init__.py

+90-58
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
1-
import base64
2-
import json
31
import logging
42
import time
3+
import sys
54
from dataclasses import asdict
65
from glob import glob
76
from pathlib import PurePath
8-
from typing import BinaryIO, Dict, List, Optional, Tuple
9-
7+
from typing import BinaryIO, Dict, List, Tuple
108
from socketdev import socketdev
119
from socketdev.fullscans import (
1210
FullScanParams,
13-
SocketArtifact,
14-
DiffArtifact,
11+
SocketArtifact
1512
)
1613
from socketdev.org import Organization
1714
from socketdev.repos import RepositoryInfo
@@ -27,8 +24,9 @@
2724
Purl,
2825
)
2926
from socketsecurity.core.exceptions import (
30-
APIResourceNotFound,
27+
APIResourceNotFound
3128
)
29+
from socketdev.exceptions import APIFailure
3230
from socketsecurity.core.licenses import Licenses
3331

3432
from .socket_config import SocketConfig
@@ -148,21 +146,24 @@ def find_files(path: str) -> List[str]:
148146
for file_name in patterns:
149147
pattern = Core.to_case_insensitive_regex(patterns[file_name]["pattern"])
150148
file_path = f"{path}/**/{pattern}"
151-
log.debug(f"Globbing {file_path}")
149+
#log.debug(f"Globbing {file_path}")
152150
glob_start = time.time()
153151
glob_files = glob(file_path, recursive=True)
154152
for glob_file in glob_files:
155153
if glob_file not in files:
156154
files.add(glob_file)
157155
glob_end = time.time()
158156
glob_total_time = glob_end - glob_start
159-
log.debug(f"Glob for pattern {file_path} took {glob_total_time:.2f} seconds")
157+
#log.debug(f"Glob for pattern {file_path} took {glob_total_time:.2f} seconds")
160158

161159
log.debug("Finished Find Files")
162160
end_time = time.time()
163161
total_time = end_time - start_time
164-
log.info(f"Found {len(files)} in {total_time:.2f} seconds")
165-
log.debug(f"Files found: {list(files)}")
162+
files_list = list(files)
163+
if len(files_list) > 5:
164+
log.debug(f"{len(files_list)} Files found ({total_time:.2f}s): {', '.join(files_list[:5])}, ...")
165+
else:
166+
log.debug(f"{len(files_list)} Files found ({total_time:.2f}s): {', '.join(files_list)}")
166167
return list(files)
167168

168169
@staticmethod
@@ -216,7 +217,7 @@ def load_files_for_sending(files: List[str], workspace: str) -> List[Tuple[str,
216217

217218
return send_files
218219

219-
def create_full_scan(self, files: List[str], params: FullScanParams) -> FullScan:
220+
def create_full_scan(self, files: List[str], params: FullScanParams, has_head_scan: bool = False) -> FullScan:
220221
"""
221222
Creates a new full scan via the Socket API.
222223
@@ -236,10 +237,10 @@ def create_full_scan(self, files: List[str], params: FullScanParams) -> FullScan
236237
raise Exception(f"Error creating full scan: {res.message}, status: {res.status}")
237238

238239
full_scan = FullScan(**asdict(res.data))
239-
240-
full_scan_artifacts_dict = self.get_sbom_data(full_scan.id)
241-
full_scan.sbom_artifacts = self.get_sbom_data_list(full_scan_artifacts_dict)
242-
full_scan.packages = self.create_packages_dict(full_scan.sbom_artifacts)
240+
if not has_head_scan:
241+
full_scan_artifacts_dict = self.get_sbom_data(full_scan.id)
242+
full_scan.sbom_artifacts = self.get_sbom_data_list(full_scan_artifacts_dict)
243+
full_scan.packages = self.create_packages_dict(full_scan.sbom_artifacts)
243244

244245
create_full_end = time.time()
245246
total_time = create_full_end - create_full_start
@@ -317,24 +318,37 @@ def get_package_license_text(self, package: Package) -> str:
317318

318319
return ""
319320

320-
def get_repo_info(self, repo_slug: str) -> RepositoryInfo:
321+
def get_repo_info(self, repo_slug: str, default_branch: str = "socket-default-branch") -> RepositoryInfo:
321322
"""
322323
Gets repository information from the Socket API.
323324
324325
Args:
325326
repo_slug: Repository slug to get info for
327+
default_branch: Default branch string to use if the repo doesn't exist
326328
327329
Returns:
328330
RepositoryInfo object
329331
330332
Raises:
331333
Exception: If API request fails
332334
"""
333-
response = self.sdk.repos.repo(self.config.org_slug, repo_slug)
334-
if not response.success:
335-
log.error(f"Failed to get repository: {response.status}")
336-
log.error(response.message)
337-
raise Exception(f"Failed to get repository info: {response.status}, message: {response.message}")
335+
try:
336+
response = self.sdk.repos.repo(self.config.org_slug, repo_slug)
337+
if not response.success:
338+
log.error(f"Failed to get repository: {response.status}")
339+
log.error(response.message)
340+
# raise Exception(f"Failed to get repository info: {response.status}, message: {response.message}")
341+
except APIFailure:
342+
log.warning(f"Failed to get repository {repo_slug}, attempting to create it")
343+
create_response = self.sdk.repos.post(self.config.org_slug, name=repo_slug, default_branch=default_branch)
344+
if not create_response.success:
345+
log.error(f"Failed to create repository: {create_response.status}")
346+
log.error(create_response.message)
347+
raise Exception(
348+
f"Failed to create repository: {create_response.status}, message: {create_response.message}"
349+
)
350+
else:
351+
return create_response.data
338352
return response.data
339353

340354
def get_head_scan_for_repo(self, repo_slug: str) -> str:
@@ -350,24 +364,36 @@ def get_head_scan_for_repo(self, repo_slug: str) -> str:
350364
repo_info = self.get_repo_info(repo_slug)
351365
return repo_info.head_full_scan_id if repo_info.head_full_scan_id else None
352366

353-
def get_added_and_removed_packages(self, head_full_scan: Optional[FullScan], new_full_scan: FullScan) -> Tuple[Dict[str, Package], Dict[str, Package]]:
367+
@staticmethod
368+
def update_package_values(pkg: Package) -> Package:
369+
pkg.purl = f"{pkg.name}@{pkg.version}"
370+
pkg.url = f"https://socket.dev/{pkg.type}/package"
371+
if pkg.namespace:
372+
pkg.purl = f"{pkg.namespace}/{pkg.purl}"
373+
pkg.url += f"/{pkg.namespace}"
374+
pkg.url += f"/{pkg.name}/overview/{pkg.version}"
375+
return pkg
376+
377+
def get_added_and_removed_packages(self, head_full_scan_id: str, new_full_scan: FullScan) -> Tuple[Dict[str, Package], Dict[str, Package]]:
354378
"""
355379
Get packages that were added and removed between scans.
356380
357381
Args:
358382
head_full_scan: Previous scan (may be None if first scan)
359-
new_full_scan: New scan just created
383+
head_full_scan_id: New scan just created
360384
361385
Returns:
362386
Tuple of (added_packages, removed_packages) dictionaries
363387
"""
364-
if head_full_scan is None:
388+
if head_full_scan_id is None:
365389
log.info(f"No head scan found. New scan ID: {new_full_scan.id}")
366390
return new_full_scan.packages, {}
367391

368-
log.info(f"Comparing scans - Head scan ID: {head_full_scan.id}, New scan ID: {new_full_scan.id}")
369-
diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan.id, new_full_scan.id).data
370-
392+
log.info(f"Comparing scans - Head scan ID: {head_full_scan_id}, New scan ID: {new_full_scan.id}")
393+
diff_start = time.time()
394+
diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan_id, new_full_scan.id).data
395+
diff_end = time.time()
396+
log.info(f"Diff Report Gathered in {diff_end - diff_start:.2f} seconds")
371397
log.info(f"Diff report artifact counts:")
372398
log.info(f"Added: {len(diff_report.artifacts.added)}")
373399
log.info(f"Removed: {len(diff_report.artifacts.removed)}")
@@ -384,32 +410,24 @@ def get_added_and_removed_packages(self, head_full_scan: Optional[FullScan], new
384410
for artifact in added_artifacts:
385411
try:
386412
pkg = Package.from_diff_artifact(asdict(artifact))
413+
pkg = Core.update_package_values(pkg)
387414
added_packages[artifact.id] = pkg
388415
except KeyError:
389416
log.error(f"KeyError: Could not create package from added artifact {artifact.id}")
390417
log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}")
391-
matches = [p for p in new_full_scan.packages.values() if p.name == artifact.name and p.version == artifact.version]
392-
if matches:
393-
log.error(f"Found {len(matches)} packages with matching name/version:")
394-
for m in matches:
395-
log.error(f" ID: {m.id}, name: {m.name}, version: {m.version}")
396-
else:
397-
log.error("No matching packages found in new_full_scan")
418+
log.error("No matching packages found in new_full_scan")
398419

399420
for artifact in removed_artifacts:
400421
try:
401422
pkg = Package.from_diff_artifact(asdict(artifact))
423+
pkg = Core.update_package_values(pkg)
424+
if pkg.namespace:
425+
pkg.purl += f"{pkg.namespace}/{pkg.purl}"
402426
removed_packages[artifact.id] = pkg
403427
except KeyError:
404428
log.error(f"KeyError: Could not create package from removed artifact {artifact.id}")
405429
log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}")
406-
matches = [p for p in head_full_scan.packages.values() if p.name == artifact.name and p.version == artifact.version]
407-
if matches:
408-
log.error(f"Found {len(matches)} packages with matching name/version:")
409-
for m in matches:
410-
log.error(f" ID: {m.id}, name: {m.name}, version: {m.version}")
411-
else:
412-
log.error("No matching packages found in head_full_scan")
430+
log.error("No matching packages found in head_full_scan")
413431

414432
return added_packages, removed_packages
415433

@@ -435,36 +453,49 @@ def create_new_diff(
435453
files = self.find_files(path)
436454
files_for_sending = self.load_files_for_sending(files, path)
437455

438-
log.debug(f"files: {files} found at path {path}")
439456
if not files:
440457
return Diff(id="no_diff_id")
441458

442-
head_full_scan_id = None
443-
444459
try:
445460
# Get head scan ID
446461
head_full_scan_id = self.get_head_scan_for_repo(params.repo)
462+
has_head_scan = True
447463
except APIResourceNotFound:
448464
head_full_scan_id = None
465+
has_head_scan = False
449466

450-
# Create new scan
451-
new_scan_start = time.time()
452-
new_full_scan = self.create_full_scan(files_for_sending, params)
453-
new_scan_end = time.time()
454-
log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
455-
456-
457-
head_full_scan = None
458-
if head_full_scan_id:
459-
head_full_scan = self.get_full_scan(head_full_scan_id)
467+
# Create new scan
468+
try:
469+
new_scan_start = time.time()
470+
new_full_scan = self.create_full_scan(files_for_sending, params, has_head_scan)
471+
new_scan_end = time.time()
472+
log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
473+
except APIFailure as e:
474+
log.error(f"API Error: {e}")
475+
sys.exit(1)
476+
except Exception as e:
477+
log.error(f"Unexpected error while creating new scan: {e}")
478+
sys.exit(1)
460479

461-
added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan, new_full_scan)
480+
try:
481+
added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan_id, new_full_scan)
482+
except APIFailure as e:
483+
log.error(f"API Error: {e}")
484+
sys.exit(1)
485+
except Exception as e:
486+
log.error(f"Unexpected error while comparing packages: {e}")
487+
sys.exit(1)
462488

463489
diff = self.create_diff_report(added_packages, removed_packages)
464490

465491
base_socket = "https://socket.dev/dashboard/org"
466492
diff.id = new_full_scan.id
467-
diff.report_url = f"{base_socket}/{self.config.org_slug}/sbom/{diff.id}"
493+
494+
report_url = f"{base_socket}/{self.config.org_slug}/sbom/{diff.id}"
495+
if not params.include_license_details:
496+
report_url += "?include_license_details=false"
497+
diff.report_url = report_url
498+
468499
if head_full_scan_id is not None:
469500
diff.diff_url = f"{base_socket}/{self.config.org_slug}/diff/{diff.id}/{head_full_scan_id}"
470501
else:
@@ -609,7 +640,8 @@ def get_source_data(package: Package, packages: dict) -> list:
609640
source = (top_purl, manifests)
610641
introduced_by.append(source)
611642
else:
612-
log.debug(f"Unable to get top level package info for {top_id}")
643+
pass
644+
# log.debug(f"Unable to get top level package info for {top_id}")
613645
return introduced_by
614646

615647
@staticmethod

0 commit comments

Comments
 (0)