diff --git a/internal/config/config.go b/internal/config/config.go
index 8e4427f..534b87c 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"log/slog"
 	"os"
+	"strconv"
 	"strings"
 )
 
@@ -178,6 +179,15 @@ type Config struct {
 	FlowSyntheticTier        string // FLOW_SYNTHETIC_TIER — seeded tier (default free)
 	FlowSyntheticDisabled    string // FLOW_SYNTHETIC_DISABLED — comma list of per-flow kill switches
 	FlowSyntheticJWTSecret   string // JWT_SECRET — shared with api; mints the synthetic session JWT
+
+	// Scale-to-zero idle-scaler (deploy_idle_scaler.go, Task #54). INERT unless
+	// DeployScaleToZeroEnabled is true — the master flag (shared name with the
+	// api's wake-path flag). When off, the idle-scaler sweep is a no-op (no k8s
+	// patch, no DB write). DeployScaleToZeroIdleMinutes is the no-activity
+	// threshold before an app is descheduled (default 30; floored at 5 to avoid
+	// pathological flapping). Enabling is an operator action after a canary.
+	DeployScaleToZeroEnabled     bool // DEPLOY_SCALE_TO_ZERO_ENABLED — master flag (default false)
+	DeployScaleToZeroIdleMinutes int  // DEPLOY_SCALE_TO_ZERO_IDLE_MINUTES — idle threshold (default 30)
 }
 
 // ErrMissingConfig is returned when a required env var is absent.
@@ -294,6 +304,20 @@ func Load() *Config {
 		FlowSyntheticTier:        os.Getenv("FLOW_SYNTHETIC_TIER"),
 		FlowSyntheticDisabled:    os.Getenv("FLOW_SYNTHETIC_DISABLED"),
 		FlowSyntheticJWTSecret:   os.Getenv("JWT_SECRET"),
+
+		// Scale-to-zero idle-scaler (Task #54). Default OFF; idle threshold
+		// default 30 min (parsed below).
+		DeployScaleToZeroEnabled: os.Getenv("DEPLOY_SCALE_TO_ZERO_ENABLED") == "true",
+	}
+
+	// DEPLOY_SCALE_TO_ZERO_IDLE_MINUTES: minutes of no-activity before an app is
+	// descheduled. Default 30; an unset / unparseable / sub-5 value floors to 30
+	// so a misconfig can't make the scaler aggressively flap apps to sleep.
+	cfg.DeployScaleToZeroIdleMinutes = 30
+	if v := strings.TrimSpace(os.Getenv("DEPLOY_SCALE_TO_ZERO_IDLE_MINUTES")); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n >= 5 {
+			cfg.DeployScaleToZeroIdleMinutes = n
+		}
 	}
 
 	// Fall back to the shared object-store bucket when the operator hasn't
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
index 947d852..99e461e 100644
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -126,6 +126,36 @@ func TestLoad_Defaults(t *testing.T) {
 	}
 }
 
+// TestLoad_DeployScaleToZeroIdleMinutes exercises the env-parse branch for
+// DEPLOY_SCALE_TO_ZERO_IDLE_MINUTES: a valid value is honoured; an invalid /
+// sub-5 value floors to the 30-minute default.
+func TestLoad_DeployScaleToZeroIdleMinutes(t *testing.T) {
+	t.Run("valid override", func(t *testing.T) {
+		clearEnv(t)
+		t.Setenv("DATABASE_URL", "postgres://localhost/db")
+		t.Setenv("DEPLOY_SCALE_TO_ZERO_IDLE_MINUTES", "45")
+		if got := Load().DeployScaleToZeroIdleMinutes; got != 45 {
+			t.Errorf("DeployScaleToZeroIdleMinutes = %d; want 45", got)
+		}
+	})
+	t.Run("sub-5 floors to default", func(t *testing.T) {
+		clearEnv(t)
+		t.Setenv("DATABASE_URL", "postgres://localhost/db")
+		t.Setenv("DEPLOY_SCALE_TO_ZERO_IDLE_MINUTES", "3")
+		if got := Load().DeployScaleToZeroIdleMinutes; got != 30 {
+			t.Errorf("sub-5 DeployScaleToZeroIdleMinutes = %d; want floor 30", got)
+		}
+	})
+	t.Run("non-numeric floors to default", func(t *testing.T) {
+		clearEnv(t)
+		t.Setenv("DATABASE_URL", "postgres://localhost/db")
+		t.Setenv("DEPLOY_SCALE_TO_ZERO_IDLE_MINUTES", "abc")
+		if got := Load().DeployScaleToZeroIdleMinutes; got != 30 {
+			t.Errorf("non-numeric DeployScaleToZeroIdleMinutes = %d; want floor 30", got)
+		}
+	})
+}
+
 func TestLoad_PanicsWithoutDatabaseURL(t *testing.T) {
 	clearEnv(t)
 	defer func() {
diff --git a/internal/jobs/deploy_idle_scaler.go b/internal/jobs/deploy_idle_scaler.go
new file mode 100644
index 0000000..91e5659
--- /dev/null
+++ b/internal/jobs/deploy_idle_scaler.go
@@ -0,0 +1,372 @@
+package jobs
+
+// deploy_idle_scaler.go — scale-to-zero idle descheduler (Task #54).
+//
+// SIBLING TO deployment_expirer.go, NOT A REPLACEMENT
+//
+//   - deployment_expirer soft-deletes (status='expired') a deploy whose TTL
+//     elapsed. That is PERMANENT and tears the app down.
+//   - this idle-scaler patches an idle (but live) app's Deployment to
+//     replicas=0 — ~$0 compute, fully REVERSIBLE. The row stays 'healthy';
+//     only scaled_to_zero flips true. The api wake endpoint (or a redeploy)
+//     brings it back. Idle ≠ expired.
+//
+// THE IDLE SIGNAL (v1 — stated honestly)
+//
+// instanode.dev serves a deployed app via a k8s Ingress that routes straight
+// to the per-app Service; the api/worker processes are NOT in the request
+// path, and no nginx-ingress request-total scrape is wired into the worker
+// today. So the only reliable "activity" signal v1 has is the
+// deployments.last_activity_at column, which is stamped at create-time and
+// bumped on every deploy / redeploy / explicit wake (api migration 068).
+//
+// THEREFORE v1 idle = "no deploy / redeploy / wake for N minutes", NOT
+// "no HTTP traffic for N minutes". This is a deliberately conservative signal:
+// it will never deschedule an app the user is actively redeploying, and the
+// explicit-wake path makes a wrongly-slept app one POST away from awake. The
+// FOLLOW-UP to make this traffic-based is to scrape an nginx-ingress per-host
+// request counter (or have the ingress bump last_activity_at) — that lifts the
+// signal to true traffic-idle without changing this job's structure.
+//
+// FLAG-GATED, DEFAULT OFF
+//
+// The whole job is inert unless DEPLOY_SCALE_TO_ZERO_ENABLED is set. When off,
+// Work() logs at DEBUG and returns immediately — no k8s patch, no DB write.
+// Proven by TestDeployIdleScaler_FlagOffNoOp.
+//
+// FAIL-OPEN POSTURE
+//
+// Constructed with a deployScaleK8sProvider that may be nil (no cluster in CI /
+// docker-compose). Work() short-circuits with a WARN when k8s is nil — the
+// rest of the worker keeps running, identical to deploy_status_reconcile.
+//
+// PER-APP OPT-OUT
+//
+// always_on=true pins an app: the candidate SELECT excludes it, so a pinned
+// (Pro+/operator) app never sleeps. The scale-down UPDATE is double-guarded on
+// the same predicate so a concurrent pin/redeploy/wake between SELECT and
+// UPDATE makes the row report 0 rows (skipped, not wrongly slept).
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"log/slog"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/riverqueue/river"
+	appsv1 "k8s.io/api/apps/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+
+	"instant.dev/worker/internal/metrics"
+)
+
+const (
+	// deployIdleScalerInterval is how often the sweep runs. 2 min is frequent
+	// enough that an app sleeps promptly after crossing the idle threshold,
+	// while the threshold itself (default 30 min) is what actually governs
+	// when descheduling happens — the tick just polls.
+	deployIdleScalerInterval = 2 * time.Minute
+
+	// idleScalerBatchLimit caps how many apps one tick descheduals so a backlog
+	// (flag flipped on for the first time with a large idle fleet) spreads the
+	// k8s API load across ticks instead of one thundering burst.
+	idleScalerBatchLimit = 50
+
+	// idleScalerK8sTimeout caps a single ScaleDeployment call so one stuck
+	// namespace can't stall the batch.
+	idleScalerK8sTimeout = 5 * time.Second
+
+	// Status / naming constants — verbatim copies of the api's canonical set
+	// (the worker module does not import the api module; same convention as
+	// deploy_status_reconcile.go). If the api strings change, update both.
+	idleScalerStatusHealthy = "healthy"
+	idleScalerProviderPfx   = "app-"            // provider_id = "app-<appID>"
+	idleScalerNSPfx         = "instant-deploy-" // namespace = "instant-deploy-<appID>"
+)
+
+// DeployIdleScalerArgs is the periodic-job payload. Empty — every run is a
+// full candidate sweep.
+type DeployIdleScalerArgs struct{}
+
+// Kind implements river.JobArgs.
+func (DeployIdleScalerArgs) Kind() string { return "deploy_idle_scaler" }
+
+// deployScaleK8sProvider is the slice of k8s the idle-scaler needs: patch a
+// Deployment's replica count. Defined as an interface so the worker can pass
+// nil when no cluster is reachable and so tests inject a recording fake.
+type deployScaleK8sProvider interface {
+	// ScaleDeployment patches spec.replicas on the named Deployment. A NotFound
+	// Deployment MUST be returned as apierrors.IsNotFound so the caller can
+	// treat a torn-down app as "skip, not fail" instead of wedging the row.
+	ScaleDeployment(ctx context.Context, namespace, name string, replicas int32) error
+}
+
+// k8sDeployScaleClient is the concrete deployScaleK8sProvider, backed by a
+// kubernetes.Clientset. Mirrors k8sDeployStatusClient in
+// deploy_status_reconcile.go.
+type k8sDeployScaleClient struct {
+	cs kubernetes.Interface
+}
+
+// ScaleDeployment implements deployScaleK8sProvider via a read-modify-write
+// Update (the fake clientset used in tests does not support strategic-merge
+// Patch on subresources, and Update is the same idempotent shape the api's
+// compute.Scale uses).
+func (c *k8sDeployScaleClient) ScaleDeployment(ctx context.Context, namespace, name string, replicas int32) error {
+	d, err := c.cs.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
+	if err != nil {
+		return err // includes apierrors.IsNotFound — caller inspects it
+	}
+	if d.Spec.Replicas != nil && *d.Spec.Replicas == replicas {
+		return nil // already at target — idempotent no-op
+	}
+	r := replicas
+	d.Spec.Replicas = &r
+	_, err = c.cs.AppsV1().Deployments(namespace).Update(ctx, d, metav1.UpdateOptions{})
+	return err
+}
+
+// NewK8sDeployScaleClient builds a deployScaleK8sProvider sharing the supplied
+// clientset (callers pass the same kubernetes.Interface used for the status
+// reconciler so they share a connection pool).
+func NewK8sDeployScaleClient(cs kubernetes.Interface) deployScaleK8sProvider {
+	return &k8sDeployScaleClient{cs: cs}
+}
+
+// NewK8sDeployScaleClientFromCluster builds a scale client from in-cluster
+// config (kubeconfig fallback for local dev). Returns (nil, err) when no
+// cluster is reachable — StartWorkers logs and passes nil, so the idle-scaler
+// short-circuits with a WARN each tick (fail-open, identical to the status
+// reconciler). Reuses newDeployK8sClientset from deploy_status_reconcile.go.
+func NewK8sDeployScaleClientFromCluster() (deployScaleK8sProvider, error) {
+	cs, err := newDeployScaleClientset()
+	if err != nil {
+		return nil, err
+	}
+	return &k8sDeployScaleClient{cs: cs}, nil
+}
+
+// newDeployScaleClientset is a package-level indirection over
+// newDeployK8sClientset so tests can override the clientset builder to exercise
+// the success return of NewK8sDeployScaleClientFromCluster without a reachable
+// cluster.
+var newDeployScaleClientset = newDeployK8sClientset
+
+// compile-time assertion that the production client satisfies the interface.
+var _ deployScaleK8sProvider = (*k8sDeployScaleClient)(nil)
+
+// compile-time assertion appsv1 is used (Get returns *appsv1.Deployment).
+var _ = appsv1.Deployment{}
+
+// DeployIdleScaler is the River worker that descheduals idle deployments.
+type DeployIdleScaler struct {
+	river.WorkerDefaults[DeployIdleScalerArgs]
+	db          *sql.DB
+	k8s         deployScaleK8sProvider // may be nil → Work warn-logs each tick
+	enabled     bool                   // DEPLOY_SCALE_TO_ZERO_ENABLED
+	idleMinutes int                    // descheduling threshold
+}
+
+// NewDeployIdleScaler constructs the worker. Pass nil for k8sProvider when the
+// cluster is unreachable. enabled gates the entire job (default-off flag);
+// idleMinutes is the no-activity threshold (the constructor floors it at 5).
+func NewDeployIdleScaler(db *sql.DB, k8sProvider deployScaleK8sProvider, enabled bool, idleMinutes int) *DeployIdleScaler {
+	if idleMinutes < 5 {
+		idleMinutes = 30
+	}
+	return &DeployIdleScaler{
+		db:          db,
+		k8s:         k8sProvider,
+		enabled:     enabled,
+		idleMinutes: idleMinutes,
+	}
+}
+
+// idleCandidate is the projection the scaler reads.
+type idleCandidate struct {
+	id         uuid.UUID
+	providerID string
+}
+
+// Work runs one idle sweep.
+func (w *DeployIdleScaler) Work(ctx context.Context, job *river.Job[DeployIdleScalerArgs]) error {
+	start := time.Now()
+
+	if !w.enabled {
+		// Flag OFF → fully inert. Idle-tick at DEBUG per worker convention 1.
+		slog.Debug("jobs.deploy_idle_scaler.disabled",
+			"note", "DEPLOY_SCALE_TO_ZERO_ENABLED unset; scale-to-zero is off",
+			"job_id", job.ID)
+		return nil
+	}
+
+	if w.k8s == nil {
+		slog.Warn("jobs.deploy_idle_scaler.skipped_no_k8s_client",
+			"reason", "k8s client init failed at startup; idle apps will not be descheduled until the worker restarts with a reachable cluster",
+			"job_id", job.ID)
+		return nil
+	}
+
+	candidates, err := w.listIdleCandidates(ctx)
+	if err != nil {
+		return fmt.Errorf("deploy_idle_scaler: list candidates: %w", err)
+	}
+
+	var scaledDown, skipped, failed int
+	for _, c := range candidates {
+		ns, name := namespaceAndNameFromProviderID(c.providerID)
+		if ns == "" {
+			// provider_id not in app-<appID> shape (e.g. a stack row) — not ours.
+			skipped++
+			continue
+		}
+
+		scaleCtx, cancel := context.WithTimeout(ctx, idleScalerK8sTimeout)
+		scaleErr := w.k8s.ScaleDeployment(scaleCtx, ns, name, 0)
+		cancel()
+		if scaleErr != nil {
+			if apierrors.IsNotFound(scaleErr) {
+				// Deployment torn down out from under us — skip, don't flip the
+				// row (status reconciler / orphan sweep will reconcile it).
+				skipped++
+				continue
+			}
+			slog.Warn("jobs.deploy_idle_scaler.scale_failed",
+				"id", c.id, "namespace", ns, "error", scaleErr)
+			metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed").Inc()
+			failed++
+			continue
+		}
+
+		// DB half: CAS-flip scaled_to_zero=true. Double-guarded on the same
+		// predicate as the SELECT so a row that raced into a non-eligible state
+		// (woken, pinned, redeployed, expired) between SELECT and UPDATE is left
+		// alone — k8s was already patched to 0, but a concurrent wake re-scales
+		// to 1 and the next tick re-evaluates, so we never strand it.
+		n, dbErr := w.markScaledToZero(ctx, c.id)
+		if dbErr != nil {
+			slog.Error("jobs.deploy_idle_scaler.db_flip_failed",
+				"id", c.id, "error", dbErr)
+			metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed").Inc()
+			failed++
+			continue
+		}
+		if n == 0 {
+			skipped++
+			continue
+		}
+		metrics.DeployScaledToZeroTotal.WithLabelValues("scaled_down").Inc()
+		slog.Info("jobs.deploy_idle_scaler.scaled_down",
+			"id", c.id, "namespace", ns,
+			"idle_threshold_min", w.idleMinutes)
+		scaledDown++
+	}
+
+	// Sample the asleep-fleet gauge regardless of whether we scaled anything
+	// this tick (so the tile stays accurate even on quiet ticks).
+	if asleep, gErr := w.countAsleep(ctx); gErr == nil {
+		metrics.DeployIdleApps.Set(float64(asleep))
+	} else {
+		slog.Warn("jobs.deploy_idle_scaler.gauge_sample_failed", "error", gErr)
+	}
+
+	if scaledDown == 0 && failed == 0 {
+		// Idle tick (nothing descheduled, nothing failed) → DEBUG per convention.
+		slog.Debug("jobs.deploy_idle_scaler.completed",
+			"candidates", len(candidates), "scaled_down", 0, "skipped", skipped,
+			"duration_ms", time.Since(start).Milliseconds(), "job_id", job.ID)
+		return nil
+	}
+	slog.Info("jobs.deploy_idle_scaler.completed",
+		"candidates", len(candidates), "scaled_down", scaledDown,
+		"skipped", skipped, "failed", failed,
+		"duration_ms", time.Since(start).Milliseconds(), "job_id", job.ID)
+	return nil
+}
+
+// listIdleCandidates returns healthy, not-already-zeroed, not-pinned
+// deployments whose last_activity_at is older than the idle threshold. NULL
+// last_activity_at rows (legacy, pre-068-backfill edge) are NOT selected —
+// migration 068 backfills them, but a defensive NULL-excluding predicate means
+// a row with no activity stamp is never descheduled blind.
+func (w *DeployIdleScaler) listIdleCandidates(ctx context.Context) ([]idleCandidate, error) {
+	cutoff := time.Now().UTC().Add(-time.Duration(w.idleMinutes) * time.Minute)
+	rows, err := w.db.QueryContext(ctx, `
+		SELECT id, COALESCE(provider_id, '')
+		FROM deployments
+		WHERE status = $1
+		  AND scaled_to_zero = false
+		  AND always_on = false
+		  AND last_activity_at IS NOT NULL
+		  AND last_activity_at < $2
+		  AND provider_id IS NOT NULL
+		  AND provider_id <> ''
+		ORDER BY last_activity_at ASC
+		LIMIT $3
+	`, idleScalerStatusHealthy, cutoff, idleScalerBatchLimit)
+	if err != nil {
+		return nil, fmt.Errorf("listIdleCandidates: query: %w", err)
+	}
+	defer func() { _ = rows.Close() }()
+
+	var out []idleCandidate
+	for rows.Next() {
+		var c idleCandidate
+		if err := rows.Scan(&c.id, &c.providerID); err != nil {
+			return nil, fmt.Errorf("listIdleCandidates: scan: %w", err)
+		}
+		out = append(out, c)
+	}
+	return out, rows.Err()
+}
+
+// markScaledToZero flips scaled_to_zero=true with the same eligibility CAS as
+// the candidate SELECT. Returns rows affected (0 = raced into non-eligible
+// state; skip).
+func (w *DeployIdleScaler) markScaledToZero(ctx context.Context, id uuid.UUID) (int64, error) {
+	res, err := w.db.ExecContext(ctx, `
+		UPDATE deployments
+		SET scaled_to_zero = true, updated_at = now()
+		WHERE id = $1
+		  AND status = $2
+		  AND scaled_to_zero = false
+		  AND always_on = false
+	`, id, idleScalerStatusHealthy)
+	if err != nil {
+		return 0, fmt.Errorf("markScaledToZero: %w", err)
+	}
+	n, _ := res.RowsAffected()
+	return n, nil
+}
+
+// countAsleep returns how many deployments are currently scaled_to_zero — the
+// value published to the instant_deploy_idle_apps gauge.
+func (w *DeployIdleScaler) countAsleep(ctx context.Context) (int, error) {
+	var n int
+	err := w.db.QueryRowContext(ctx, `
+		SELECT count(*) FROM deployments WHERE scaled_to_zero = true
+	`).Scan(&n)
+	if err != nil {
+		return 0, fmt.Errorf("countAsleep: %w", err)
+	}
+	return n, nil
+}
+
+// namespaceAndNameFromProviderID derives the per-deployment namespace +
+// Deployment name from provider_id = "app-<appID>". Returns ("","") for a
+// provider_id not in that shape (e.g. a stack row) so the caller skips it.
+func namespaceAndNameFromProviderID(providerID string) (namespace, name string) {
+	if !strings.HasPrefix(providerID, idleScalerProviderPfx) {
+		return "", ""
+	}
+	appID := strings.TrimPrefix(providerID, idleScalerProviderPfx)
+	if appID == "" {
+		return "", ""
+	}
+	return idleScalerNSPfx + appID, providerID
+}
diff --git a/internal/jobs/deploy_idle_scaler_test.go b/internal/jobs/deploy_idle_scaler_test.go
new file mode 100644
index 0000000..7d53d2c
--- /dev/null
+++ b/internal/jobs/deploy_idle_scaler_test.go
@@ -0,0 +1,498 @@
+package jobs
+
+// deploy_idle_scaler_test.go — coverage for the scale-to-zero idle-scaler
+// (Task #54). SQL via sqlmock; k8s via a recording fake scale provider.
+//
+// Properties pinned:
+//   - flag OFF  → Work is a total no-op: NO SQL query is issued, NO scale call.
+//   - k8s nil   → Work short-circuits with no SQL query (fail-open).
+//   - happy path→ idle candidate is scaled to 0 (recorded) + DB CAS-flipped +
+//                 the scaled_down counter + idle-apps gauge update.
+//   - CAS race  → UPDATE returns 0 rows → counted as skipped, NOT scaled_down.
+//   - NotFound  → torn-down Deployment is skipped (no DB flip, no failure).
+//   - scale err → non-NotFound k8s error increments scale_failed, row untouched.
+//   - constructor floors a sub-5 idle threshold to 30.
+//   - namespaceAndNameFromProviderID derives ns/name and rejects bad shapes.
+
+import (
+	"context"
+	"errors"
+	"os"
+	"sync"
+	"testing"
+
+	sqlmock "github.com/DATA-DOG/go-sqlmock"
+	"github.com/google/uuid"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+	"github.com/riverqueue/river"
+	"github.com/riverqueue/river/rivertype"
+	appsv1 "k8s.io/api/apps/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/client-go/kubernetes"
+	clientfake "k8s.io/client-go/kubernetes/fake"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+
+	"instant.dev/worker/internal/metrics"
+)
+
+func idleScalerJob() *river.Job[DeployIdleScalerArgs] {
+	return &river.Job[DeployIdleScalerArgs]{JobRow: &rivertype.JobRow{ID: 7}}
+}
+
+// recordingScaleProvider records every ScaleDeployment call and can be told to
+// return a fixed error (e.g. a synthetic NotFound or transport failure).
+type recordingScaleProvider struct {
+	mu       sync.Mutex
+	calls    []string // "ns/name=replicas"
+	err      error
+	notFound bool
+}
+
+func (r *recordingScaleProvider) ScaleDeployment(_ context.Context, ns, name string, replicas int32) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.calls = append(r.calls, ns+"/"+name)
+	if r.notFound {
+		return apierrors.NewNotFound(schema.GroupResource{Resource: "deployments"}, name)
+	}
+	return r.err
+}
+
+func (r *recordingScaleProvider) callCount() int {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return len(r.calls)
+}
+
+// TestDeployIdleScaler_FlagOffNoOp proves the job is fully inert when the flag
+// is off: sqlmock asserts NO query is issued (any query would fail the
+// ExpectationsWereMet check since none are registered), and the panicking
+// provider would blow up if Work reached the scale layer.
+func TestDeployIdleScaler_FlagOffNoOp(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+
+	prov := &recordingScaleProvider{err: errors.New("must not be called when flag off")}
+	w := NewDeployIdleScaler(db, prov, false /* enabled */, 30)
+
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("flag-off Work should be nil, got: %v", err)
+	}
+	if prov.callCount() != 0 {
+		t.Errorf("flag-off must not call ScaleDeployment; got %d calls", prov.callCount())
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("flag-off must issue no SQL: %v", err)
+	}
+}
+
+// TestDeployIdleScaler_NilK8sNoOp proves a nil k8s client short-circuits before
+// any SQL is issued (fail-open at startup).
+func TestDeployIdleScaler_NilK8sNoOp(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+
+	w := NewDeployIdleScaler(db, nil /* k8s */, true /* enabled */, 30)
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("nil-k8s Work should be nil, got: %v", err)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("nil-k8s must issue no SQL: %v", err)
+	}
+}
+
+// TestDeployIdleScaler_ScalesDownIdleApp covers the happy path: one idle
+// candidate → scaled to 0 + DB CAS flip + gauge sample.
+func TestDeployIdleScaler_ScalesDownIdleApp(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+
+	id := uuid.New()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).
+			AddRow(id, "app-abc123"))
+	mock.ExpectExec(`UPDATE deployments`).
+		WithArgs(id, "healthy").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectQuery(`SELECT count\(\*\) FROM deployments WHERE scaled_to_zero = true`).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
+
+	prov := &recordingScaleProvider{}
+	w := NewDeployIdleScaler(db, prov, true, 30)
+
+	before := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scaled_down"))
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("Work: %v", err)
+	}
+	if prov.callCount() != 1 {
+		t.Fatalf("expected 1 ScaleDeployment call, got %d", prov.callCount())
+	}
+	if prov.calls[0] != "instant-deploy-abc123/app-abc123" {
+		t.Errorf("scaled wrong target: %q", prov.calls[0])
+	}
+	after := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scaled_down"))
+	if after != before+1 {
+		t.Errorf("scaled_down counter: before=%v after=%v", before, after)
+	}
+	if g := testutil.ToFloat64(metrics.DeployIdleApps); g != 1 {
+		t.Errorf("idle-apps gauge = %v; want 1", g)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet SQL expectations: %v", err)
+	}
+}
+
+// TestDeployIdleScaler_CASRaceSkips covers the case where the row was already
+// woken/pinned/redeployed between SELECT and UPDATE — UPDATE returns 0 rows,
+// so the scaled_down counter must NOT increment.
+func TestDeployIdleScaler_CASRaceSkips(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+
+	id := uuid.New()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).
+			AddRow(id, "app-raced"))
+	mock.ExpectExec(`UPDATE deployments`).
+		WithArgs(id, "healthy").
+		WillReturnResult(sqlmock.NewResult(0, 0)) // 0 rows = raced
+	mock.ExpectQuery(`SELECT count\(\*\) FROM deployments WHERE scaled_to_zero = true`).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+
+	prov := &recordingScaleProvider{}
+	w := NewDeployIdleScaler(db, prov, true, 30)
+
+	before := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scaled_down"))
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("Work: %v", err)
+	}
+	after := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scaled_down"))
+	if after != before {
+		t.Errorf("CAS-raced row must NOT increment scaled_down: before=%v after=%v", before, after)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet SQL expectations: %v", err)
+	}
+}
+
+// TestDeployIdleScaler_NotFoundSkips: a torn-down Deployment (NotFound) is
+// skipped — no DB flip, no failure counter.
+func TestDeployIdleScaler_NotFoundSkips(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+
+	id := uuid.New()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).
+			AddRow(id, "app-gone"))
+	// No UPDATE expected — NotFound short-circuits before the DB flip.
+	mock.ExpectQuery(`SELECT count\(\*\) FROM deployments WHERE scaled_to_zero = true`).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+
+	prov := &recordingScaleProvider{notFound: true}
+	w := NewDeployIdleScaler(db, prov, true, 30)
+
+	beforeFail := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed"))
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("Work: %v", err)
+	}
+	afterFail := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed"))
+	if afterFail != beforeFail {
+		t.Errorf("NotFound must NOT increment scale_failed: before=%v after=%v", beforeFail, afterFail)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet SQL expectations: %v", err)
+	}
+}
+
+// TestDeployIdleScaler_ScaleErrorCounts: a non-NotFound k8s error increments
+// scale_failed and leaves the row untouched (no UPDATE).
+func TestDeployIdleScaler_ScaleErrorCounts(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+
+	id := uuid.New()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).
+			AddRow(id, "app-boom"))
+	mock.ExpectQuery(`SELECT count\(\*\) FROM deployments WHERE scaled_to_zero = true`).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+
+	prov := &recordingScaleProvider{err: errors.New("k8s boom")}
+	w := NewDeployIdleScaler(db, prov, true, 30)
+
+	before := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed"))
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("Work: %v", err)
+	}
+	after := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed"))
+	if after != before+1 {
+		t.Errorf("k8s error must increment scale_failed: before=%v after=%v", before, after)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet SQL expectations: %v", err)
+	}
+}
+
+// TestNewDeployIdleScaler_FloorsIdleMinutes: a sub-5 threshold floors to 30 so
+// a misconfig can't make the scaler aggressively flap apps to sleep.
+func TestNewDeployIdleScaler_FloorsIdleMinutes(t *testing.T) {
+	w := NewDeployIdleScaler(nil, nil, true, 1)
+	if w.idleMinutes != 30 {
+		t.Errorf("sub-5 idleMinutes should floor to 30; got %d", w.idleMinutes)
+	}
+	w2 := NewDeployIdleScaler(nil, nil, true, 45)
+	if w2.idleMinutes != 45 {
+		t.Errorf("valid idleMinutes should pass through; got %d", w2.idleMinutes)
+	}
+}
+
+// TestNamespaceAndNameFromProviderID covers the derivation + bad-shape rejection.
+func TestNamespaceAndNameFromProviderID(t *testing.T) {
+	cases := []struct {
+		providerID string
+		wantNS     string
+		wantName   string
+	}{
+		{"app-abc", "instant-deploy-abc", "app-abc"},
+		{"instant-stack-xyz", "", ""}, // stack row — not ours
+		{"app-", "", ""},              // empty appID
+		{"", "", ""},
+	}
+	for _, c := range cases {
+		ns, name := namespaceAndNameFromProviderID(c.providerID)
+		if ns != c.wantNS || name != c.wantName {
+			t.Errorf("namespaceAndNameFromProviderID(%q) = (%q,%q); want (%q,%q)",
+				c.providerID, ns, name, c.wantNS, c.wantName)
+		}
+	}
+}
+
+// TestDeployIdleScalerArgs_Kind pins the River job kind.
+func TestDeployIdleScalerArgs_Kind(t *testing.T) {
+	if (DeployIdleScalerArgs{}).Kind() != "deploy_idle_scaler" {
+		t.Errorf("Kind() = %q; want deploy_idle_scaler", (DeployIdleScalerArgs{}).Kind())
+	}
+}
+
+// TestNewK8sDeployScaleClientFromCluster_NoConfig exercises the cluster
+// constructor's error path when neither in-cluster config nor a kubeconfig is
+// reachable. Gated like TestNewDeployK8sClientset_NoConfig so it does not pick
+// up a developer's ~/.kube/config.
+func TestNewK8sDeployScaleClientFromCluster_NoConfig(t *testing.T) {
+	if _, err := os.Stat(clientcmd.RecommendedHomeFile); err == nil {
+		t.Skip("kubeconfig present on host — error path not reachable here")
+	}
+	if os.Getenv("KUBERNETES_SERVICE_HOST") != "" {
+		t.Skip("running in-cluster — in-cluster config will succeed")
+	}
+	if _, err := NewK8sDeployScaleClientFromCluster(); err == nil {
+		t.Error("expected error with no in-cluster config and no kubeconfig")
+	}
+}
+
+// TestNewK8sDeployScaleClientFromCluster_Success overrides the clientset builder
+// seam so the success return is exercised without a reachable cluster. A
+// rest.Config pointed at an unroutable host builds a *kubernetes.Clientset
+// without connecting, proving the constructor wraps it into a non-nil provider.
+func TestNewK8sDeployScaleClientFromCluster_Success(t *testing.T) {
+	orig := newDeployScaleClientset
+	t.Cleanup(func() { newDeployScaleClientset = orig })
+	newDeployScaleClientset = func() (*kubernetes.Clientset, error) {
+		return kubernetes.NewForConfig(&rest.Config{Host: "http://localhost:1"})
+	}
+	prov, err := NewK8sDeployScaleClientFromCluster()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if prov == nil {
+		t.Fatal("expected non-nil scale provider")
+	}
+}
+
+// TestBuildIdleScaleK8s covers both branches of the StartWorkers helper that
+// wires the idle-scaler's k8s client: success returns the provider; a builder
+// error returns nil (fail-open) rather than propagating.
+func TestBuildIdleScaleK8s(t *testing.T) {
+	orig := newDeployScaleClientset
+	t.Cleanup(func() { newDeployScaleClientset = orig })
+
+	t.Run("success returns provider", func(t *testing.T) {
+		newDeployScaleClientset = func() (*kubernetes.Clientset, error) {
+			return kubernetes.NewForConfig(&rest.Config{Host: "http://localhost:1"})
+		}
+		if got := buildIdleScaleK8s(); got == nil {
+			t.Fatal("expected non-nil provider on success")
+		}
+	})
+
+	t.Run("builder error returns nil", func(t *testing.T) {
+		newDeployScaleClientset = func() (*kubernetes.Clientset, error) {
+			return nil, errors.New("no cluster")
+		}
+		if got := buildIdleScaleK8s(); got != nil {
+			t.Fatalf("expected nil provider on builder error; got %v", got)
+		}
+	})
+}
+
+// TestDeployIdleScaler_ListQueryError: a failing candidate SELECT bubbles up as
+// a job error (River retries).
+func TestDeployIdleScaler_ListQueryError(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnError(errors.New("db down"))
+	w := NewDeployIdleScaler(db, &recordingScaleProvider{}, true, 30)
+	if err := w.Work(context.Background(), idleScalerJob()); err == nil {
+		t.Error("list query error should fail the job")
+	}
+}
+
+// TestDeployIdleScaler_ScanError: a row whose id column is a non-UUID scrap
+// forces a rows.Scan error inside listIdleCandidates → job error.
+func TestDeployIdleScaler_ScanError(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).
+			AddRow("not-a-uuid", "app-x"))
+	w := NewDeployIdleScaler(db, &recordingScaleProvider{}, true, 30)
+	if err := w.Work(context.Background(), idleScalerJob()); err == nil {
+		t.Error("scan error should fail the job")
+	}
+}
+
+// TestDeployIdleScaler_SkipsForeignProviderID: a candidate whose provider_id is
+// not in app-<appID> shape (e.g. a stack row that slipped the SQL filter) is
+// skipped without a scale call or DB flip.
+func TestDeployIdleScaler_SkipsForeignProviderID(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).
+			AddRow(uuid.New(), "instant-stack-xyz"))
+	mock.ExpectQuery(`SELECT count\(\*\) FROM deployments WHERE scaled_to_zero = true`).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+	prov := &recordingScaleProvider{}
+	w := NewDeployIdleScaler(db, prov, true, 30)
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("Work: %v", err)
+	}
+	if prov.callCount() != 0 {
+		t.Errorf("foreign provider_id must not be scaled; got %d calls", prov.callCount())
+	}
+}
+
+// TestDeployIdleScaler_DBFlipError: a failing scaled_to_zero UPDATE after a
+// successful scale increments scale_failed (the row is retried next tick).
+func TestDeployIdleScaler_DBFlipError(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	id := uuid.New()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).AddRow(id, "app-dbflip"))
+	mock.ExpectExec(`UPDATE deployments`).
+		WithArgs(id, "healthy").
+		WillReturnError(errors.New("update exploded"))
+	mock.ExpectQuery(`SELECT count\(\*\) FROM deployments WHERE scaled_to_zero = true`).
+		WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
+	prov := &recordingScaleProvider{}
+	w := NewDeployIdleScaler(db, prov, true, 30)
+
+	before := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed"))
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("Work: %v", err)
+	}
+	after := testutil.ToFloat64(metrics.DeployScaledToZeroTotal.WithLabelValues("scale_failed"))
+	if after != before+1 {
+		t.Errorf("db-flip error must increment scale_failed: before=%v after=%v", before, after)
+	}
+}
+
+// TestDeployIdleScaler_GaugeSampleError: a failing countAsleep query is logged
+// but does not fail the job (the scale-down already succeeded).
+func TestDeployIdleScaler_GaugeSampleError(t *testing.T) {
+	db, mock, err := sqlmock.New()
+	if err != nil {
+		t.Fatalf("sqlmock.New: %v", err)
+	}
+	defer db.Close()
+	id := uuid.New()
+	mock.ExpectQuery(`SELECT id, COALESCE\(provider_id`).
+		WillReturnRows(sqlmock.NewRows([]string{"id", "provider_id"}).AddRow(id, "app-g"))
+	mock.ExpectExec(`UPDATE deployments`).
+		WithArgs(id, "healthy").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectQuery(`SELECT count\(\*\) FROM deployments WHERE scaled_to_zero = true`).
+		WillReturnError(errors.New("count failed"))
+	w := NewDeployIdleScaler(db, &recordingScaleProvider{}, true, 30)
+	if err := w.Work(context.Background(), idleScalerJob()); err != nil {
+		t.Fatalf("gauge-sample error must not fail the job, got: %v", err)
+	}
+}
+
+// TestK8sDeployScaleClient_ScaleDeployment covers the production scale client
+// against a fake clientset: scale a seeded Deployment to 0, an already-at-target
+// no-op, and a NotFound on a missing Deployment.
+func TestK8sDeployScaleClient_ScaleDeployment(t *testing.T) {
+	one := int32(1)
+	cs := clientfake.NewSimpleClientset(&appsv1.Deployment{
+		ObjectMeta: metav1.ObjectMeta{Name: "app-x", Namespace: "instant-deploy-x"},
+		Spec:       appsv1.DeploymentSpec{Replicas: &one},
+	})
+	c := NewK8sDeployScaleClient(cs)
+
+	// Scale down to 0.
+	if err := c.ScaleDeployment(context.Background(), "instant-deploy-x", "app-x", 0); err != nil {
+		t.Fatalf("ScaleDeployment(0): %v", err)
+	}
+	got, _ := cs.AppsV1().Deployments("instant-deploy-x").Get(context.Background(), "app-x", metav1.GetOptions{})
+	if got.Spec.Replicas == nil || *got.Spec.Replicas != 0 {
+		t.Errorf("replicas after scale-down = %v; want 0", got.Spec.Replicas)
+	}
+
+	// Already at 0 → idempotent no-op (no error).
+	if err := c.ScaleDeployment(context.Background(), "instant-deploy-x", "app-x", 0); err != nil {
+		t.Errorf("ScaleDeployment(0) idempotent should be nil: %v", err)
+	}
+
+	// Missing Deployment → NotFound surfaced (caller maps to skip).
+	err := c.ScaleDeployment(context.Background(), "instant-deploy-missing", "app-missing", 0)
+	if !apierrors.IsNotFound(err) {
+		t.Errorf("ScaleDeployment on missing Deployment should return NotFound, got: %v", err)
+	}
+}
diff --git a/internal/jobs/workers.go b/internal/jobs/workers.go
index 7387140..197eb88 100644
--- a/internal/jobs/workers.go
+++ b/internal/jobs/workers.go
@@ -317,6 +317,23 @@ func newMinioAdminClient(cfg *config.Config) (*madmin.AdminClient, error) {
 	})
 }
 
+// buildIdleScaleK8s constructs the scale-to-zero idle-scaler's k8s client from
+// cluster config. Returns nil (NOT an error) when no cluster is reachable
+// (CI / docker-compose) so StartWorkers stays fail-open: the worker warn-logs
+// and the idle-scaler short-circuits each tick while every other periodic job
+// keeps running. Extracted from StartWorkers so the success/failure branches
+// are unit-testable without a live River DB.
+func buildIdleScaleK8s() deployScaleK8sProvider {
+	scaleClient, scErr := NewK8sDeployScaleClientFromCluster()
+	if scErr != nil {
+		slog.Warn("workers.deploy_idle_scaler.k8s_client_unavailable",
+			"error", scErr,
+			"note", "idle-scaler will short-circuit each tick until the worker restarts with a reachable cluster")
+		return nil
+	}
+	return scaleClient
+}
+
 func StartWorkers(ctx context.Context, db *sql.DB, rdb *redis.Client, cfg *config.Config, provClient *provisioner.Client, planRegistry PlanRegistry, backupPlans BackupPlanRegistry, deployStatusK8s deployStatusK8sProvider, deployAutopsyK8s deployAutopsyK8sProvider, nrApp *newrelic.Application) *Workers {
 	// rdb is used by LoopsEventForwarderWorker (cursor storage). Other
 	// workers access redis indirectly via the platform DB.
@@ -506,6 +523,15 @@ func StartWorkers(ctx context.Context, db *sql.DB, rdb *redis.Client, cfg *confi
 	statusReconciler := NewDeployStatusReconciler(db, deployStatusK8s).
 		WithAutopsyK8s(deployAutopsyK8s)
 	river.AddWorker(workers, WithObservability(statusReconciler, nrApp))
+	// Scale-to-zero idle-scaler (Task #54). INERT unless
+	// DEPLOY_SCALE_TO_ZERO_ENABLED is set (default off). Builds its own scale
+	// client from cluster config; nil when unreachable (CI / docker-compose) →
+	// the worker warn-logs each tick and other periodic jobs keep running. See
+	// deploy_idle_scaler.go for the idle-signal + cold-start design notes.
+	idleScaleK8s := buildIdleScaleK8s()
+	river.AddWorker(workers, WithObservability(
+		NewDeployIdleScaler(db, idleScaleK8s, cfg.DeployScaleToZeroEnabled, cfg.DeployScaleToZeroIdleMinutes),
+		nrApp))
 	// Event-email forwarder — drains audit_log rows into the configured
 	// provider every 60s for lifecycle email triggering. The provider is
 	// always non-nil (NoopProvider when EMAIL_PROVIDER is unset). See
@@ -1126,6 +1152,18 @@ func buildPeriodicJobs(cfg *config.Config) []*river.PeriodicJob {
 			},
 			&river.PeriodicJobOpts{RunOnStart: true},
 		),
+		// Scale-to-zero idle-scaler (Task #54) — every 2 min. INERT unless
+		// DEPLOY_SCALE_TO_ZERO_ENABLED (the worker is registered regardless; its
+		// Work() short-circuits when the flag is off). RunOnStart=false: there's
+		// no backlog to drain on boot and we don't want a worker restart to
+		// immediately deschedule apps before the first idle window elapses.
+		river.NewPeriodicJob(
+			river.PeriodicInterval(deployIdleScalerInterval),
+			func() (river.JobArgs, *river.InsertOpts) {
+				return DeployIdleScalerArgs{}, reconcileInsertOpts(deployIdleScalerInterval)
+			},
+			&river.PeriodicJobOpts{RunOnStart: false},
+		),
 		// Magic-link reconciler — every 60s. RunOnStart=true so a worker
 		// restart immediately drains rows whose first send failed while
 		// the worker was down (we have a 15-min TTL window to retry, so
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
index 8ed991d..d400534 100644
--- a/internal/metrics/metrics.go
+++ b/internal/metrics/metrics.go
@@ -123,6 +123,39 @@ var (
 		Help: "Deployments soft-deleted (status='expired') by the expirer worker.",
 	})
 
+	// ── scale-to-zero (deploy_idle_scaler.go, Task #54) ──────────────────────
+	//
+	// DeployScaledToZeroTotal increments once per scale action, labelled by
+	// outcome:
+	//   outcome="scaled_down"  — an idle app was descheduled to replicas=0
+	//                            (k8s patch + DB flip both succeeded). The
+	//                            happy "we saved compute" path.
+	//   outcome="woke_up"      — reserved for a worker-initiated wake (the api
+	//                            wake endpoint owns the user-initiated path);
+	//                            present so the dashboard series exists.
+	//   outcome="wake_failed"  — a wake/scale-up attempt failed (k8s error).
+	//                            P1 if > 0: a user's app may be stuck asleep.
+	//   outcome="scale_failed" — a scale-DOWN k8s patch failed (the row is left
+	//                            untouched and retried next tick). P2 observ.
+	//
+	// NR alert: deploy-scale-to-zero-fail.json (wake_failed > 0 → P1;
+	// scale_failed sustained → P2). Prom rule: DeployScaleToZeroFailures.
+	// Dashboard tile: infra/newrelic/dashboards/instanode-reliability.json.
+	// Catalog: infra/observability/METRICS-CATALOG.md (lazy *Vec — label
+	// families primed in metrics_test.go so /metrics exposes them from start).
+	DeployScaledToZeroTotal = promauto.NewCounterVec(prometheus.CounterOpts{
+		Name: "instant_deploy_scaled_to_zero_total",
+		Help: "Scale-to-zero idle-scaler actions by outcome (scaled_down | woke_up | wake_failed | scale_failed).",
+	}, []string{"outcome"})
+
+	// DeployIdleApps is the gauge of apps observed asleep (scaled_to_zero=true)
+	// at the end of each idle-scaler tick. Tracks the descheduled fleet size —
+	// the headline "how much compute scale-to-zero is reclaiming" signal.
+	DeployIdleApps = promauto.NewGauge(prometheus.GaugeOpts{
+		Name: "instant_deploy_idle_apps",
+		Help: "Deployments currently scaled to zero (asleep), sampled each idle-scaler tick.",
+	})
+
 	// DeployRemindersSentTotal counts reminder emails actually dispatched
 	// to a real owner email (post-CAS, post-email-send).
 	DeployRemindersSentTotal = promauto.NewCounter(prometheus.CounterOpts{
diff --git a/internal/metrics/metrics_test.go b/internal/metrics/metrics_test.go
index 60fcfdb..6d6f596 100644
--- a/internal/metrics/metrics_test.go
+++ b/internal/metrics/metrics_test.go
@@ -108,6 +108,16 @@ func TestAllMetrics_AreRegistered(t *testing.T) {
 	DeployAutopsyCapturedTotal.WithLabelValues("logs_unavailable").Add(0)
 	DeployAutopsyCapturedTotal.WithLabelValues("already_present").Add(0)
 	DeployAutopsyCapturedTotal.WithLabelValues("audit_emit_failed").Add(0)
+	// Prime all four scale-to-zero outcome label values so /metrics exposes the
+	// series from process start (lazy *Vec otherwise leaves the dashboard tile
+	// empty until the first real scale action).
+	DeployScaledToZeroTotal.WithLabelValues("scaled_down").Add(0)
+	DeployScaledToZeroTotal.WithLabelValues("woke_up").Add(0)
+	DeployScaledToZeroTotal.WithLabelValues("wake_failed").Add(0)
+	DeployScaledToZeroTotal.WithLabelValues("scale_failed").Add(0)
+
+	// Plain gauge
+	DeployIdleApps.Set(0)
 
 	// Gauge vecs
 	ResourceDegradedGauge.WithLabelValues("postgres").Set(0)