Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/atelet/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"path/filepath"
"strconv"
"strings"
"sync"

"cloud.google.com/go/storage"
"github.com/agent-substrate/substrate/cmd/atelet/internal/ategcs"
Expand Down Expand Up @@ -187,6 +188,9 @@ type AteomHerder struct {
pullCache *memorypullcache.MemoryPullCache
anonGCSClient ategcs.ObjectStorage
gcsClient ategcs.ObjectStorage

urlHashMu sync.Mutex
urlHashCache map[string]string
}

var _ ateletpb.AteomHerderServer = (*AteomHerder)(nil)
Expand All @@ -204,6 +208,7 @@ func NewService(
pullCache: pullCache,
anonGCSClient: anonGCSClient,
gcsClient: gcsClient,
urlHashCache: make(map[string]string),
}
return wms
}
Expand Down
66 changes: 58 additions & 8 deletions cmd/atelet/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,8 @@ func TestFetchAssetRejectsBadHash(t *testing.T) {
t.Fatalf("planting cache file: %v", err)
}

s := &AteomHerder{}
if _, err := s.fetchAsset(context.Background(), assetEntry{SHA256: badHash}); err == nil {
s := &AteomHerder{urlHashCache: make(map[string]string)}
if _, _, err := s.fetchAsset(context.Background(), assetEntry{SHA256: badHash}); err == nil {
t.Error("fetchAsset returned a cache hit for an invalid hash; validation must run before the os.Stat early return")
}
}
Expand Down Expand Up @@ -292,11 +292,14 @@ func TestFetchAssetStreaming(t *testing.T) {

t.Run("good asset is cached", func(t *testing.T) {
ateompath.StaticFilesDir = t.TempDir()
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}}
path, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: goodHash})
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}, urlHashCache: make(map[string]string)}
path, resolvedHash, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: goodHash})
if err != nil {
t.Fatalf("fetchAsset: %v", err)
}
if resolvedHash != goodHash {
t.Errorf("resolvedHash = %q, want %q", resolvedHash, goodHash)
}
got, err := os.ReadFile(path)
if err != nil {
t.Fatalf("reading cached asset: %v", err)
Expand All @@ -309,8 +312,8 @@ func TestFetchAssetStreaming(t *testing.T) {
t.Run("over-cap asset rejected, cache not written", func(t *testing.T) {
ateompath.StaticFilesDir = t.TempDir()
maxAssetBytes = 4 // content is longer than this
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}}
if _, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: goodHash}); err == nil {
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}, urlHashCache: make(map[string]string)}
if _, _, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: goodHash}); err == nil {
t.Fatal("fetchAsset accepted an over-cap asset")
}
if _, err := os.Stat(ateompath.RunSCBinaryPath(goodHash)); !errors.Is(err, os.ErrNotExist) {
Expand All @@ -322,14 +325,61 @@ func TestFetchAssetStreaming(t *testing.T) {
ateompath.StaticFilesDir = t.TempDir()
maxAssetBytes = origCap
wrongHash := strings.Repeat("a", 64) // valid 64-hex format, wrong value
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}}
if _, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: wrongHash}); err == nil {
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}, urlHashCache: make(map[string]string)}
if _, _, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: wrongHash}); err == nil {
t.Fatal("fetchAsset accepted a hash mismatch")
}
if _, err := os.Stat(ateompath.RunSCBinaryPath(wrongHash)); !errors.Is(err, os.ErrNotExist) {
t.Errorf("mismatched download left a file at the cache path (stat err = %v)", err)
}
})

t.Run("empty sha256 downloads and computes hash", func(t *testing.T) {
ateompath.StaticFilesDir = t.TempDir()
maxAssetBytes = origCap
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}, urlHashCache: make(map[string]string)}
path, resolvedHash, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: ""})
if err != nil {
t.Fatalf("fetchAsset: %v", err)
}
if resolvedHash != goodHash {
t.Errorf("resolvedHash = %q, want %q", resolvedHash, goodHash)
}
got, err := os.ReadFile(path)
if err != nil {
t.Fatalf("reading cached asset: %v", err)
}
if !bytes.Equal(got, content) {
t.Errorf("cached bytes = %q, want %q", got, content)
}
})

t.Run("empty sha256 uses in-memory cache on second call", func(t *testing.T) {
ateompath.StaticFilesDir = t.TempDir()
maxAssetBytes = origCap
s := &AteomHerder{anonGCSClient: fakeObjectStorage{data: content}, urlHashCache: make(map[string]string)}
_, _, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: ""})
if err != nil {
t.Fatalf("first fetchAsset: %v", err)
}
// Replace the GCS client with one that errors — the in-memory cache
// should prevent a second download.
s.anonGCSClient = fakeObjectStorage{err: fmt.Errorf("should not be called")}
path2, resolvedHash2, err := s.fetchAsset(context.Background(), assetEntry{URL: url, SHA256: ""})
if err != nil {
t.Fatalf("second fetchAsset should hit cache: %v", err)
}
if resolvedHash2 != goodHash {
t.Errorf("resolvedHash = %q, want %q", resolvedHash2, goodHash)
}
got, err := os.ReadFile(path2)
if err != nil {
t.Fatalf("reading cached asset: %v", err)
}
if !bytes.Equal(got, content) {
t.Errorf("cached bytes = %q, want %q", got, content)
}
})
}

// TestRPCBoundariesReject confirms each of the three RPCs validates path inputs
Expand Down
154 changes: 128 additions & 26 deletions cmd/atelet/sandbox_assets.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,81 +86,183 @@ func recordFromRequest(sa *ateletpb.SandboxAssets) (*sandboxAssetsRecord, error)
// local path. For gVisor this is the single "runsc" asset, passed to ateom as
// RunscPath. Binaries are content-addressed and cached, so re-fetching at
// Checkpoint/Restore is a no-op once present.
//
// When the asset's SHA256 is empty (the SandboxConfig omitted it), the binary
// is downloaded and hashed on the fly; the resolved hash is written back into
// rec so that writeSandboxRecord persists the real hash for checkpoint/restore.
func (s *AteomHerder) ensureSandboxBinary(ctx context.Context, rec *sandboxAssetsRecord) (string, error) {
if err := os.MkdirAll(ateompath.StaticFilesDir, 0o700); err != nil {
return "", fmt.Errorf("while creating static files dir: %w", err)
}
// gVisor uses a single "runsc" asset.
entry, ok := rec.Assets["runsc"]
if !ok {
return "", status.Errorf(codes.InvalidArgument, "sandbox assets for class %q missing required %q file", rec.SandboxClass, "runsc")
}
return s.fetchAsset(ctx, entry)
path, resolvedHash, err := s.fetchAsset(ctx, entry)
if err != nil {
return "", err
}
if entry.SHA256 != resolvedHash {
entry.SHA256 = resolvedHash
rec.Assets["runsc"] = entry
}
return path, nil
}

// fetchAsset downloads one content-addressed asset (verifying its sha256) into
// the shared static-files cache and returns its local path. On a cache hit it
// returns immediately.
func (s *AteomHerder) fetchAsset(ctx context.Context, entry assetEntry) (string, error) {
// fetchAsset downloads one content-addressed asset into the shared static-files
// cache and returns its local path and resolved SHA256. When entry.SHA256 is
// provided, the download is verified against the expected hash. When empty, the
// hash is computed on the fly and an in-memory URL→hash cache avoids redundant
// downloads within the same atelet process lifetime.
func (s *AteomHerder) fetchAsset(ctx context.Context, entry assetEntry) (string, string, error) {
if err := resources.ValidateRunscHash(entry.SHA256); err != nil {
return "", status.Error(codes.InvalidArgument, err.Error())
return "", "", status.Error(codes.InvalidArgument, err.Error())
}

if entry.SHA256 != "" {
return s.fetchAssetPinned(ctx, entry)
}
return s.fetchAssetUnpinned(ctx, entry)
}

// fetchAssetPinned handles the case where the expected SHA256 is known: check
// the disk cache, download on miss, and verify the hash.
func (s *AteomHerder) fetchAssetPinned(ctx context.Context, entry assetEntry) (string, string, error) {
localPath := ateompath.RunSCBinaryPath(entry.SHA256)
_, err := os.Stat(localPath)
if err == nil { // EQUALS nil
return localPath, nil
if err == nil {
return localPath, entry.SHA256, nil
} else if !errors.Is(err, os.ErrNotExist) {
return "", fmt.Errorf("while stat-ing local file: %w", err)
return "", "", fmt.Errorf("while stat-ing local file: %w", err)
}

// gVisor's runsc lives in the public gs://gvisor bucket, so the anonymous
// client suffices. TODO: drive authenticated asset fetches from atelet
// configuration for assets in private buckets.
rc, err := ategcs.Open(ctx, s.anonGCSClient, entry.URL)
wantSum, err := hex.DecodeString(entry.SHA256)
if err != nil {
return "", "", fmt.Errorf("while parsing sha256 hash: %w", err)
}

gotHash, err := s.downloadAsset(ctx, entry.URL, localPath, wantSum)
if err != nil {
return "", fmt.Errorf("while fetching %v: %w", entry.URL, err)
return "", "", err
}
return localPath, gotHash, nil
}

// fetchAssetUnpinned handles the case where no SHA256 was provided: consult the
// in-memory URL→hash cache first, then download and compute the hash on the fly.
func (s *AteomHerder) fetchAssetUnpinned(ctx context.Context, entry assetEntry) (string, string, error) {
s.urlHashMu.Lock()
cachedHash := s.urlHashCache[entry.URL]
s.urlHashMu.Unlock()

if cachedHash != "" {
localPath := ateompath.RunSCBinaryPath(cachedHash)
if _, err := os.Stat(localPath); err == nil {
return localPath, cachedHash, nil
}
}

localPath, computedHash, err := s.downloadAndCache(ctx, entry.URL)
if err != nil {
return "", "", err
}

s.urlHashMu.Lock()
s.urlHashCache[entry.URL] = computedHash
s.urlHashMu.Unlock()

return localPath, computedHash, nil
}

// downloadAndCache downloads an asset to a temp file while computing its SHA256,
// then places it in the content-addressed cache. If a file with the computed
// hash already exists on disk, the download is discarded and the existing file
// is returned.
func (s *AteomHerder) downloadAndCache(ctx context.Context, url string) (string, string, error) {
rc, err := ategcs.Open(ctx, s.anonGCSClient, url)
if err != nil {
return "", "", fmt.Errorf("while fetching %v: %w", url, err)
}
defer rc.Close()

wantSum, err := hex.DecodeString(entry.SHA256)
tmpFile, err := os.CreateTemp(ateompath.StaticFilesDir, "runsc-download-")
if err != nil {
return "", fmt.Errorf("while parsing sha256 hash: %w", err)
return "", "", fmt.Errorf("while creating temp file: %w", err)
}
tmpName := tmpFile.Name()
defer os.Remove(tmpName)
defer tmpFile.Close()

hasher := sha256.New()
n, err := io.Copy(io.MultiWriter(tmpFile, hasher), io.LimitReader(rc, maxAssetBytes+1))
if err != nil {
return "", "", fmt.Errorf("while downloading %v: %w", url, err)
}
if n > maxAssetBytes {
return "", "", fmt.Errorf("asset %v exceeds %d-byte cap", url, maxAssetBytes)
}

computedHash := hex.EncodeToString(hasher.Sum(nil))
localPath := ateompath.RunSCBinaryPath(computedHash)

if _, err := os.Stat(localPath); err == nil {
return localPath, computedHash, nil
}

if err := tmpFile.Chmod(0o755); err != nil {
return "", "", fmt.Errorf("while setting file mode: %w", err)
}
if err := tmpFile.Close(); err != nil {
return "", "", fmt.Errorf("while closing temp file: %w", err)
}
if err := os.Rename(tmpName, localPath); err != nil {
return "", "", fmt.Errorf("while renaming temp file to target: %w", err)
}

return localPath, computedHash, nil
}

// downloadAsset downloads a URL to localPath, verifying the content against
// wantSum. Returns the hex-encoded hash of the downloaded content.
func (s *AteomHerder) downloadAsset(ctx context.Context, url, localPath string, wantSum []byte) (string, error) {
rc, err := ategcs.Open(ctx, s.anonGCSClient, url)
if err != nil {
return "", fmt.Errorf("while fetching %v: %w", url, err)
}
defer rc.Close()

tmpFile, err := os.CreateTemp(filepath.Dir(localPath), filepath.Base(localPath)+"-download-")
if err != nil {
return "", fmt.Errorf("while creating temp file: %w", err)
}
tmpName := tmpFile.Name()
defer os.Remove(tmpName) // partial-download cleanup; no-op after rename
defer os.Remove(tmpName)
defer tmpFile.Close()

// Stream to disk, hashing as we go; +1 lets an over-cap asset trip n > cap.
// Verify-after-copy keeps a bad download at the temp path, never the cache.
hasher := sha256.New()
n, err := io.Copy(io.MultiWriter(tmpFile, hasher), io.LimitReader(rc, maxAssetBytes+1))
if err != nil {
return "", fmt.Errorf("while downloading %v: %w", entry.URL, err)
return "", fmt.Errorf("while downloading %v: %w", url, err)
}
if n > maxAssetBytes {
return "", fmt.Errorf("asset %v exceeds %d-byte cap", entry.URL, maxAssetBytes)
return "", fmt.Errorf("asset %v exceeds %d-byte cap", url, maxAssetBytes)
}
if got := hasher.Sum(nil); !bytes.Equal(got, wantSum) {
return "", fmt.Errorf("sha256 mismatch; got=%x want=%s", got, entry.SHA256)
got := hasher.Sum(nil)
if !bytes.Equal(got, wantSum) {
return "", fmt.Errorf("sha256 mismatch; got=%x want=%x", got, wantSum)
}

if err := tmpFile.Chmod(0o755); err != nil {
return "", fmt.Errorf("while setting file mode: %w", err)
}
if err := tmpFile.Close(); err != nil { // flush before rename
if err := tmpFile.Close(); err != nil {
return "", fmt.Errorf("while closing temp file: %w", err)
}
if err := os.Rename(tmpName, localPath); err != nil {
return "", fmt.Errorf("while renaming temp file to target: %w", err)
}

return localPath, nil
return hex.EncodeToString(got), nil
}

// writeSandboxRecord persists the actor's running sandbox assets on-node so a
Expand Down
3 changes: 3 additions & 0 deletions internal/resources/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ func ValidateContainerNames(names []string) error {
// point the cache-hit early return (and the download target) at an arbitrary
// binary outside the static-files dir.
func ValidateRunscHash(sha256Hash string) error {
if sha256Hash == "" {
return nil
}
if len(sha256Hash) != 64 {
return fmt.Errorf("invalid runsc sha256 hash: want 64 hex chars, got %d", len(sha256Hash))
}
Expand Down
2 changes: 1 addition & 1 deletion internal/resources/validate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func TestValidateRunscHash(t *testing.T) {
}{
{"valid lowercase", valid, false},
{"valid uppercase", strings.ToUpper(valid), false},
{"empty", "", true},
{"empty", "", false},
{"too short", "abc123", true},
{"too long", valid + "00", true},
{"separator", strings.Repeat("a", 60) + "/../", true},
Expand Down
6 changes: 2 additions & 4 deletions manifests/ate-install/sandboxconfig-gvisor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ spec:
assets:
amd64:
runsc:
url: "gs://gvisor/releases/nightly/2026-05-19/x86_64/runsc"
sha256: "a397be1abc2420d26bce6c70e6e2ff96c73aaaab929756c56f5e2089ea842b63"
url: "gs://gvisor/releases/release/latest/x86_64/runsc"
arm64:
runsc:
url: "gs://gvisor/releases/nightly/2026-05-19/aarch64/runsc"
sha256: "1ba2366ae2efceba166046f51a4104f9261c9cb72c6db8f5b3fe2dc57dea86b9"
url: "gs://gvisor/releases/release/latest/aarch64/runsc"
8 changes: 5 additions & 3 deletions pkg/api/v1alpha1/sandboxconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ type AssetFile struct {

// SHA256 is the lower-case hex SHA256 of the asset. It both names the cached
// file (preventing collisions) and verifies the download's integrity.
// When omitted, atelet downloads the asset, computes the hash on the fly,
// and uses it for caching and the snapshot manifest.
//
// +required
// +kubebuilder:validation:Pattern=`^[a-f0-9]{64}$`
SHA256 string `json:"sha256"`
// +optional
// +kubebuilder:validation:Pattern=`^([a-f0-9]{64})?$`
SHA256 string `json:"sha256,omitempty"`
}

// SandboxConfigSpec is the desired state of a SandboxConfig.
Expand Down
7 changes: 3 additions & 4 deletions pkg/api/v1alpha1/sandboxconfig_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,9 @@ func TestSandboxConfigValidation(t *testing.T) {
wantErr: true,
errMsg: "url",
}, {
name: "asset missing sha256",
sc: sandboxConfig("bad-no-sha", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": {URL: "gs://bucket/runsc"}}}),
wantErr: true,
errMsg: "sha256",
name: "valid gvisor with runsc, no sha256",
sc: sandboxConfig("ok-no-sha", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": {URL: "gs://bucket/runsc"}}}),
wantErr: false,
}, {
name: "asset sha256 not 64 hex",
sc: sandboxConfig("bad-sha", SandboxClassGvisor, map[string]map[string]AssetFile{"amd64": {"runsc": {URL: "gs://bucket/runsc", SHA256: "deadbeef"}}}),
Expand Down