Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cli/docs/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ const (
// Unique curation flags
CurationOutput = "curation-format"
DockerImageName = "image"
HuggingFaceModel = "hugging-face-model"
SolutionPath = "solution-path"
IncludeCachedPackages = "include-cached-packages"
LegacyPeerDeps = "legacy-peer-deps"
Expand Down Expand Up @@ -228,7 +229,7 @@ var commandFlags = map[string][]string{
StaticSca, XrayLibPluginBinaryCustomPath, AnalyzerManagerCustomPath, AddSastRules,
},
CurationAudit: {
CurationOutput, WorkingDirs, Threads, RequirementsFile, InsecureTls, useWrapperAudit, UseIncludedBuilds, SolutionPath, DockerImageName, IncludeCachedPackages, MvnIncludePluginDeps, LegacyPeerDeps, RunNative,
CurationOutput, WorkingDirs, Threads, RequirementsFile, InsecureTls, useWrapperAudit, UseIncludedBuilds, SolutionPath, DockerImageName, HuggingFaceModel, IncludeCachedPackages, MvnIncludePluginDeps, LegacyPeerDeps, RunNative,
},
GitCountContributors: {
InputFile, ScmType, ScmApiUrl, Token, Owner, RepoName, Months, DetailedSummary, InsecureTls, GitThreads, CacheValidity,
Expand Down Expand Up @@ -370,7 +371,8 @@ var flagsMap = map[string]components.Flag{
UseConfigProfile: components.NewBoolFlag(UseConfigProfile, "Set to false to override config profile for the audit.", components.WithBoolDefaultValue(true), components.SetHiddenBoolFlag()),

// Docker flags
DockerImageName: components.NewStringFlag(DockerImageName, "Specifies the Docker image name to audit. Uses the same format as the Docker CLI, including Artifactory-hosted images."),
DockerImageName: components.NewStringFlag(DockerImageName, "Specifies the Docker image name to audit. Uses the same format as the Docker CLI, including Artifactory-hosted images."),
HuggingFaceModel: components.NewStringFlag(HuggingFaceModel, "Specifies one or more Hugging Face models or datasets to audit, comma-separated, in the format '<model-id>:<revision>' (e.g. 'mcpotato/42-eicar-street:main,bert-base-uncased'). The revision is optional and defaults to 'main' when omitted. These models are audited in addition to any auto-discovered from source. The Artifactory repository is read from the HF_ENDPOINT environment variable."),

// Git flags
InputFile: components.NewStringFlag(InputFile, "Path to an input file in YAML format contains multiple git providers. With this option, all other scm flags will be ignored and only git servers mentioned in the file will be examined.."),
Expand Down
3 changes: 2 additions & 1 deletion cli/docs/scan/curation/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ When to use:

Prerequisites:
- A configured JFrog Platform server (jf c add) with JFrog Curation entitlement.
- Project must use a supported package manager (npm, yarn, pip, maven, gradle, nuget, go) resolved through a curation-configured remote.
- Project must use a supported package manager (npm, yarn, pip, maven, gradle, nuget, go) resolved through a curation-configured remote. Docker images and Hugging Face models/datasets are audited via dedicated flags.
- The package manager and its lockfile must be present in the working directory.

Common patterns:
Expand All @@ -23,6 +23,7 @@ Common patterns:
$ jf curation-audit --format=json --threads=4
$ jf curation-audit --requirements-file=requirements-dev.txt
$ jf curation-audit --docker-image=my-image:tag
$ HF_ENDPOINT=https://my.jfrog.io/artifactory/api/huggingfaceml/my-hf-repo jf curation-audit --hugging-face-model=org/model:main

Gotchas:
- The user/token must be entitled for Curation; otherwise the command exits with an entitlement notice.
Expand Down
1 change: 1 addition & 0 deletions cli/scancommands.go
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,7 @@ func getCurationCommand(c *components.Context) (*curation.CurationAuditCommand,
SetPipRequirementsFile(c.GetStringFlagValue(flags.RequirementsFile)).
SetSolutionFilePath(c.GetStringFlagValue(flags.SolutionPath))
curationAuditCommand.SetDockerImageName(c.GetStringFlagValue(flags.DockerImageName))
curationAuditCommand.SetHuggingFaceModel(c.GetStringFlagValue(flags.HuggingFaceModel))
curationAuditCommand.SetIncludeCachedPackages(c.GetBoolFlagValue(flags.IncludeCachedPackages))
curationAuditCommand.SetMvnIncludePluginDeps(c.GetBoolFlagValue(flags.MvnIncludePluginDeps))
curationAuditCommand.SetLegacyPeerDeps(c.GetBoolFlagValue(flags.LegacyPeerDeps))
Expand Down
158 changes: 157 additions & 1 deletion commands/curation/curationaudit.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"net/http"
"os"
"path/filepath"
Expand Down Expand Up @@ -39,6 +40,7 @@ import (
"github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo"
"github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies"
"github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies/docker"
"github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies/huggingface"
npmtech "github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies/npm"
pnpmtech "github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies/pnpm"
"github.com/jfrog/jfrog-cli-security/sca/bom/buildinfo/technologies/python"
Expand Down Expand Up @@ -93,6 +95,11 @@ const (
"are blocked by the curation policy. Details of the policy violations are shown in the table below.\n" +
"Dependency analysis cannot proceed until these issues are addressed.\n" +
"Once you apply a waiver or switch to an approved version and re-run the audit, additional results will be available."

// hfUnresolvedReportKey is the results-map key for a Hugging Face scan that found
// only unresolved (dynamic) references and no statically-resolvable models, so it
// has warnings to surface but no curation table of its own.
hfUnresolvedReportKey = "huggingface (unresolved references)"
)

var CurationOutputFormats = []string{string(outFormat.Table), string(outFormat.Json)}
Expand Down Expand Up @@ -120,7 +127,8 @@ var supportedTech = map[techutils.Technology]func(ca *CurationAuditCommand) (boo
techutils.Gem: func(ca *CurationAuditCommand) (bool, error) {
return ca.checkSupportByVersionOrEnv(techutils.Gem, MinArtiGradleGemSupport)
},
techutils.Docker: func(ca *CurationAuditCommand) (bool, error) { return true, nil },
techutils.Docker: func(ca *CurationAuditCommand) (bool, error) { return true, nil },
techutils.HuggingFaceMl: func(ca *CurationAuditCommand) (bool, error) { return true, nil },
techutils.Poetry: func(ca *CurationAuditCommand) (bool, error) {
return ca.checkSupportByVersionOrEnv(techutils.Poetry, MinArtiPassThroughSupport)
},
Expand Down Expand Up @@ -248,6 +256,7 @@ type CurationAuditCommand struct {
OriginPath string
parallelRequests int
dockerImageName string
huggingFaceModel string
includeCachedPackages bool
mvnIncludePluginDeps bool
audit.AuditParamsInterface
Expand All @@ -261,6 +270,11 @@ type CurationReport struct {
// was produced via the metadata-API fallback. The partial-report warning
// is printed after the spinner stops so it is not swallowed by the spinner.
isPartial bool
// warnings carries non-fatal, user-facing messages produced while building
// the dependency tree (e.g. Hugging Face references that could not be
// statically resolved). They are printed after the curation table so the
// coverage gap stays visible instead of being buried in BOM-build output.
warnings []string
}

type WaiverResponse struct {
Expand Down Expand Up @@ -300,6 +314,15 @@ func (ca *CurationAuditCommand) SetDockerImageName(dockerImageName string) *Cura
return ca
}

func (ca *CurationAuditCommand) HuggingFaceModel() string {
return ca.huggingFaceModel
}

func (ca *CurationAuditCommand) SetHuggingFaceModel(huggingFaceModel string) *CurationAuditCommand {
ca.huggingFaceModel = huggingFaceModel
return ca
}

func (ca *CurationAuditCommand) SetIncludeCachedPackages(includeCachedPackages bool) *CurationAuditCommand {
ca.includeCachedPackages = includeCachedPackages
return ca
Expand Down Expand Up @@ -362,6 +385,13 @@ func (ca *CurationAuditCommand) Run() (err error) {
for projectPath, packagesStatus := range results {
err = errors.Join(err, printResult(ca.OutputFormat(), projectPath, packagesStatus.packagesStatus))

// Surface tree-build warnings (e.g. Hugging Face references that could not be
// statically resolved) after the table, so the coverage gap is the last thing
// the user sees rather than being buried in the BOM-build output above.
for _, w := range packagesStatus.warnings {
log.Warn(w)
}

for _, ps := range packagesStatus.packagesStatus {
if ps.WaiverAllowed && !utils.IsCI() {
// If at least one package allows waiver requests, we will ask the user if they want to request a waiver
Expand Down Expand Up @@ -469,6 +499,19 @@ func (ca *CurationAuditCommand) doCurateAudit(results map[string]*CurationReport
log.Debug(fmt.Sprintf("Docker image name '%s' was provided, running Docker curation audit.", ca.DockerImageName()))
techs = []string{techutils.Docker.String()}
}
// --hugging-face-model: explicit spot-check — run HF only (skip pip/npm/etc. so
// the user gets a fast single-model verdict without waiting for full dep resolution).
// Auto-discovery (HF_ENDPOINT set + .py files present): additive — HF runs alongside
// the detected package managers so a full audit still covers both surfaces.
if ca.HuggingFaceModel() != "" {
log.Debug(fmt.Sprintf("Hugging Face models '%s' were provided explicitly — running HF-only audit.", ca.HuggingFaceModel()))
techs = []string{techutils.HuggingFaceMl.String()}
} else if os.Getenv("HF_ENDPOINT") != "" && hasPythonFiles(ca.OriginPath) {
// Auto-discovery: attempt an HF source scan when HF_ENDPOINT is configured and
// .py/.ipynb files are present. BuildDependencyTree returns gracefully if no
// HF call sites are found.
techs = appendIfMissing(techs, techutils.HuggingFaceMl.String())
}
// Resolve npm→yarn when the project was configured with 'jf yarn-config' (yarn.yaml exists)
// but has no yarn.lock/.yarnrc.yml so the file-based detector picked npm instead.
for i, tech := range techs {
Expand Down Expand Up @@ -500,6 +543,44 @@ func (ca *CurationAuditCommand) doCurateAudit(results map[string]*CurationReport
return nil
}

// appendIfMissing appends value to slice only if it is not already present,
// keeping the technology list free of duplicates when a tech is both detected
// and requested explicitly.
func appendIfMissing(slice []string, value string) []string {
for _, v := range slice {
if v == value {
return slice
}
}
return append(slice, value)
}

// hasPythonFiles returns true if dir contains at least one .py or .ipynb file,
// indicating the project may have Hugging Face model references to discover.
func hasPythonFiles(dir string) bool {
if dir == "" {
dir = "."
}
found := false
_ = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if err != nil || found {
return nil
}
if d.IsDir() {
if n := d.Name(); n == ".git" || n == ".venv" || n == "venv" || n == "__pycache__" || n == "node_modules" {
return filepath.SkipDir
}
return nil
}
ext := strings.ToLower(filepath.Ext(path))
if ext == ".py" || ext == ".ipynb" {
found = true
}
return nil
})
return found
}

// resolveNpmYarnTech upgrades npm→yarn when the project has yarn.yaml but no npm.yaml —
// the developer ran 'jf yarn-config' but the file-system detector fell back to npm.
func resolveNpmYarnTech(tech string) string {
Expand Down Expand Up @@ -596,6 +677,9 @@ func (ca *CurationAuditCommand) getBuildInfoParamsByTech() (technologies.BuildIn
PipRequirementsFile: ca.PipRequirementsFile(),
// Docker params
DockerImageName: ca.DockerImageName(),
// Hugging Face params
HuggingFaceModel: ca.HuggingFaceModel(),
WorkingDirectory: ca.OriginPath,
// NuGet params
SolutionFilePath: ca.SolutionFilePath(),
}, err
Expand Down Expand Up @@ -639,6 +723,17 @@ func (ca *CurationAuditCommand) auditTree(tech techutils.Technology, results map
}
// Validate the graph isn't empty.
if len(depTreeResult.FullDepTrees) == 0 {
// For HuggingFace auto-discovery, no models found is a normal outcome
// (the project has .py files but no HF call sites) — not an error.
if tech == techutils.HuggingFaceMl {
log.Debug("Hugging Face: no model references discovered in source — skipping HF curation probe")
// There may still be unresolved (dynamic) references worth surfacing
// even when nothing statically resolved to a curation probe.
if len(depTreeResult.Warnings) > 0 {
results[hfUnresolvedReportKey] = &CurationReport{warnings: depTreeResult.Warnings}
}
return nil
}
return errorutils.CheckErrorf("found no dependencies for the audited project using '%v' as the package manager", tech.String())
}
rtManager, serverDetails, err := ca.getRtManagerAndAuth(tech)
Expand Down Expand Up @@ -707,6 +802,7 @@ func (ca *CurationAuditCommand) auditTree(tech techutils.Technology, results map
packagesStatus: packagesStatus,
// We subtract 1 because the root node is not a package.
totalNumberOfPackages: len(depTreeResult.FlatTree.Nodes) - 1,
warnings: depTreeResult.Warnings,
}
return err
}
Expand Down Expand Up @@ -925,6 +1021,17 @@ func (ca *CurationAuditCommand) SetRepo(tech techutils.Technology) error {
return nil
}

// Hugging Face resolves its Artifactory repo from the --hugging-face-model reference,
// not from a 'jf <tech>-config' yaml file.
if tech == techutils.HuggingFaceMl {
repoConfig, err := huggingface.GetHuggingFaceRepositoryConfig()
if err != nil {
return err
}
ca.setPackageManagerConfig(repoConfig)
return nil
}

// When --run-native is set for npm, read the Artifactory URL and repo name from the
// project's .npmrc via native npm config — no jf npm-config/npm.yaml required.
if ca.RunNative() && tech == techutils.Npm {
Expand Down Expand Up @@ -1184,6 +1291,14 @@ func (nc *treeAnalyzer) fetchNodeStatus(node xrayUtils.GraphNode, p *sync.Map) e
}
return nil
}
// Hugging Face: a 404 means the model/dataset+revision is not resolvable through the
// proxy (e.g. uncached with on-demand repositories disabled, an unknown revision, or a
// dataset the catalog does not track). That is not a curation block and must not fail
// the whole audit — treat it as "not blocked" and move on, like the NuGet 404 skip below.
if resp != nil && resp.StatusCode == http.StatusNotFound && nc.tech == techutils.HuggingFaceMl {
log.Debug(fmt.Sprintf("Hugging Face: %s:%s not resolvable at %s (HTTP 404) — skipping", name, version, packageUrl))
continue
}
if err != nil {
if resp != nil && resp.StatusCode >= 400 {
return errorutils.CheckErrorf(errorTemplateHeadRequest, packageUrl, name, version, resp.StatusCode, err)
Expand Down Expand Up @@ -1617,6 +1732,9 @@ func getUrlNameAndVersionByTech(tech techutils.Technology, node *xrayUtils.Graph
case techutils.Docker:
downloadUrls, name, version = getDockerNameAndVersion(node.Id, artiUrl, repo)
return
case techutils.HuggingFaceMl:
downloadUrls, name, version = getHuggingFaceNameAndVersion(node.Id, artiUrl, repo)
return
}
return
}
Expand Down Expand Up @@ -1862,6 +1980,44 @@ func getDockerNameAndVersion(id, artiUrl, repo string) (downloadUrls []string, n
return
}

// getHuggingFaceNameAndVersion extracts the model id and revision from a node id of the
// form "huggingfaceml://<repo_id>:<revision>" and builds the model-info probe URL.
//
// The probe targets the model metadata endpoint, which the curation service blocks
// (HEAD returns 403) for a malicious revision — independent of any specific file:
//
// {artiUrl}/api/huggingfaceml/{repo}/api/models/{repo_id}/revision/{revision}
func getHuggingFaceNameAndVersion(id, artiUrl, repo string) (downloadUrls []string, name, version string) {
if id == "" {
return
}
id = strings.TrimPrefix(id, huggingface.HuggingFacePackagePrefix)

// Datasets are probed via api/datasets/ instead of api/models/. The repo type is
// carried by an optional "dataset|" marker placed right after the scheme prefix.
repoTypePath := "api/models"
if strings.HasPrefix(id, huggingface.DatasetNodeMarker) {
repoTypePath = "api/datasets"
id = strings.TrimPrefix(id, huggingface.DatasetNodeMarker)
}

// The repo id (e.g. "mcpotato/42-eicar-street") contains '/' but never ':'; the
// revision suffix never contains '/', so split on the last ':'.
if idx := strings.LastIndex(id, ":"); idx > 0 && !strings.Contains(id[idx+1:], "/") {
name = id[:idx]
version = id[idx+1:]
} else {
name = id
version = huggingface.DefaultRevision
}

if artiUrl != "" && repo != "" {
downloadUrls = []string{fmt.Sprintf("%s/api/huggingfaceml/%s/%s/%s/revision/%s",
strings.TrimSuffix(artiUrl, "/"), repo, repoTypePath, name, version)}
}
return
}

func GetCurationOutputFormat(formatFlagVal string) (format outFormat.OutputFormat, err error) {
// Default print format is table.
format = outFormat.Table
Expand Down
Loading
Loading