diff --git a/cmd/lakebox/api.go b/cmd/lakebox/api.go index 2f19722541a..b231a2d683c 100644 --- a/cmd/lakebox/api.go +++ b/cmd/lakebox/api.go @@ -15,39 +15,31 @@ import ( // sandboxPath returns the URL path for a single sandbox resource. The ID is // path-escaped so a value like `foo;rm -rf /` lands on -// `/sandboxes/foo%3Brm%20-rf%20%2F` and gets a clean 400 from -// validate_sandbox_id on the server, rather than its unescaped `/` re-routing -// the request to the list endpoint (which silently returns an empty result the -// CLI then renders as an all-zero sandbox record). +// `/sandboxes/foo%3Brm%20-rf%20%2F` and gets a clean 400 from the server, +// rather than its unescaped `/` re-routing the request to the list endpoint +// (which silently returns an empty result the CLI then renders as an +// all-zero sandbox record). func sandboxPath(id string) string { return lakeboxAPIPath + "/" + url.PathEscape(id) } -// Sandboxes live under the `/sandboxes` sub-collection of the lakebox service -// namespace (see `lakebox.proto` `LakeboxService.CreateSandbox`). -const lakeboxAPIPath = "/api/2.0/lakebox/sandboxes" - -// SSH keys are nested under the lakebox service namespace alongside -// `sandboxes/` (see `LakeboxService.CreateSshKey`). -const lakeboxKeysAPIPath = "/api/2.0/lakebox/ssh-keys" +// Sub-collections under the lakebox service namespace. +const ( + lakeboxAPIPath = "/api/2.0/lakebox/sandboxes" + lakeboxKeysAPIPath = "/api/2.0/lakebox/ssh-keys" +) -// orgIDHeader is sent by multi-workspace gateways (e.g. dogfood staging) so -// the gateway can scope the credential to a specific workspace. Without it, -// requests fail with "Credential was not sent or was of an unsupported type -// for this API." +// orgIDHeader scopes the credential to a workspace on multi-workspace +// gateways. Without it, requests fail with "Credential was not sent or was +// of an unsupported type for this API." const orgIDHeader = "X-Databricks-Org-Id" -// maxNameBytes mirrors the manager-side `Sandbox.name` length cap. The -// server measures bytes, not characters, so a name made of emoji or other -// multi-byte UTF-8 hits the limit in fewer visible characters than the user -// expects. Mirroring the constant client-side lets us fail fast with the -// observed byte count instead of paying a round-trip for a 400. +// maxNameBytes mirrors the server-side `Sandbox.name` cap. The server +// measures bytes (not runes), so emoji hit the limit faster than expected; +// mirroring it client-side lets us fail fast with the observed byte count. const maxNameBytes = 256 -// validateName returns an error when `name` exceeds the wire limit. The -// error names the observed byte count so the user can recover in one shot -// (the original server message just said "exceeds 256 bytes" without saying -// by how much, which is unhelpful when emoji are in play). +// validateName rejects names that exceed the wire limit (counted in bytes). func validateName(name string) error { if n := len(name); n > maxNameBytes { return fmt.Errorf("--name is %d bytes; limit is %d (emoji and most non-ASCII characters count as 2-4 bytes each)", n, maxNameBytes) @@ -61,26 +53,20 @@ type lakeboxAPI struct { } // sandboxCreateBody is the inner `Sandbox` message in the create payload. -// Only `name` is caller-settable today; all other fields are server-chosen. +// Only `name` is caller-settable; the rest are server-chosen. type sandboxCreateBody struct { Name string `json:"name,omitempty"` } -// createRequest is the JSON body for POST /api/2.0/lakebox/sandboxes. -// `CreateSandboxRequest { Sandbox sandbox = 1 }` has `body: "*"`, so the -// wire body is the full request with a `sandbox` wrapper. +// createRequest is the wrapped POST body for sandbox creation. type createRequest struct { Sandbox sandboxCreateBody `json:"sandbox"` } -// createResponse is the JSON body returned by POST /api/2.0/lakebox/sandboxes. -// Mirrors the `Sandbox` proto message after JSON transcoding. -// -// `FQDN` is the manager's internal routing hostname — not user-actionable. -// `GatewayHost` is the public SSH gateway hostname for the workspace, -// stamped by the manager (universe#1966484) so the CLI no longer needs to -// hardcode regional defaults. Both are `omitempty` so old/new wire shapes -// round-trip cleanly. +// createResponse mirrors the Sandbox proto after JSON transcoding. FQDN is +// the manager's internal routing host (not user-actionable); GatewayHost is +// the public SSH gateway. Both are `omitempty` so old and new server +// versions round-trip cleanly. type createResponse struct { SandboxID string `json:"sandboxId"` Status string `json:"status"` @@ -88,16 +74,11 @@ type createResponse struct { GatewayHost string `json:"gatewayHost,omitempty"` } -// sandboxEntry is a single item in the list response. -// Mirrors the `Sandbox` proto message after JSON transcoding. -// -// IdleTimeout and NoAutostop correspond to the proto's `optional` fields; -// they're pointers so we can tell "field absent on the wire" (server has -// the global default) from "explicitly set to 0 / false." -// -// `IdleTimeout` is a `google.protobuf.Duration`. Proto3 JSON canonical -// form serializes Duration as a string with an `s` suffix (e.g. -// `"900s"`), so the Go field is `*string` and we parse on read. +// sandboxEntry mirrors the Sandbox proto after JSON transcoding. +// IdleTimeout and NoAutostop are pointer-typed so we can distinguish +// "field absent on the wire" (server uses its default) from "explicitly +// set to 0 / false". IdleTimeout is a proto3-canonical Duration string +// (see idleTimeoutSecs). type sandboxEntry struct { SandboxID string `json:"sandboxId"` Status string `json:"status"` @@ -129,21 +110,12 @@ func (e *sandboxEntry) idleTimeoutSecs() int64 { return int64(d.Seconds()) } -// autoStopLabel renders the auto-stop policy advertised by the manager -// for one sandbox into a short human-readable string. Mirrors the wire -// semantics from `lakebox/proto/lakebox.proto`: +// autoStopLabel renders the auto-stop policy for one sandbox: // - `no_autostop == true` → never auto-stops // - `idle_timeout` set and positive → that many seconds // - otherwise → no enforcement today; render as "never" // -// The "otherwise" branch used to render a hardcoded `10m` claiming to -// mirror a manager-side `watchdog_idle_grace_secs` fallback. That -// fallback does not exist in the current tree (only a stale comment in -// `lakebox/proto/lakebox.proto`); the ESM-side `LakeboxChecker` is also -// gated off via the `lakeboxCheckerEnabled` SAFE flag, so unset -// `idle_timeout` is functionally "never auto-stops" today. Once the -// manager enforces a real default, swap this branch back to a duration -// label. +// If the manager later enforces an idle-grace default, render it here. func (e *sandboxEntry) autoStopLabel() string { if e.NoAutostop != nil && *e.NoAutostop { return "never" @@ -156,7 +128,7 @@ func (e *sandboxEntry) autoStopLabel() string { // formatDurationSecs prints `secs` as a compact duration (e.g. `90s`, // `15m`, `2h`, `1h30m`). Falls back to seconds if it's not a clean -// minute/hour multiple. Avoids pulling in a dependency just for this. +// minute/hour multiple. func formatDurationSecs(secs int64) string { if secs < 60 { return fmt.Sprintf("%ds", secs) @@ -174,28 +146,17 @@ func formatDurationSecs(secs int64) string { } // listResponse is the JSON body returned by GET /api/2.0/lakebox/sandboxes. -// `nextPageToken` is empty on the final page (or when the result fits in one). type listResponse struct { Sandboxes []sandboxEntry `json:"sandboxes"` NextPageToken string `json:"nextPageToken,omitempty"` } -// listPageSize matches the manager-side default. Typical user fleets are -// well under this, so one round-trip covers them; the pagination loop in -// `list` handles the rare larger fleet. +// listPageSize matches the manager-side default. const listPageSize = 100 -// updateBody is the PATCH request body. The proto declares -// `UpdateSandboxRequest { Sandbox sandbox = 1 }` with `body: "sandbox"` -// in the (google.api.http) annotation, so the HTTP body is the inner -// `Sandbox` message directly — there is no `{"sandbox": {...}}` -// wrapping on the wire. -// -// Pointer fields encode the proto3 `optional` semantics — only the -// fields we explicitly set are emitted, leaving everything else -// server-untouched. `IdleTimeout` is a proto3-canonical Duration -// string (e.g. `"900s"`); the server-side wire type is -// `google.protobuf.Duration`. +// updateBody is the PATCH body; the server takes the inner `Sandbox` +// message directly with no `{"sandbox": ...}` wrapping. Pointer fields +// encode proto3 optional semantics (see sandboxEntry). type updateBody struct { SandboxID string `json:"sandbox_id"` Name *string `json:"name,omitempty"` @@ -209,6 +170,7 @@ type registerKeyRequest struct { Name string `json:"name,omitempty"` } +// newLakeboxAPI returns a lakeboxAPI bound to the workspace client's config. func newLakeboxAPI(w *databricks.WorkspaceClient) (*lakeboxAPI, error) { c, err := client.New(w.Config) if err != nil { @@ -218,10 +180,9 @@ func newLakeboxAPI(w *databricks.WorkspaceClient) (*lakeboxAPI, error) { } // headers attaches the workspace routing identifier so multi-workspace -// gateways (e.g. SPOG hosts) can scope the credential. Mirrors the pattern -// in libs/telemetry, libs/filer, and SDK-generated workspace services. The -// auth.WorkspaceIDNone sentinel ("none") is treated as unset so the literal -// string never goes on the wire. +// gateways (e.g. SPOG hosts) can scope the credential. The +// auth.WorkspaceIDNone sentinel ("none") is treated as unset so the +// literal string never goes on the wire. func (a *lakeboxAPI) headers() map[string]string { wsID := a.c.Config.WorkspaceID if wsID == "" || wsID == auth.WorkspaceIDNone { @@ -231,8 +192,7 @@ func (a *lakeboxAPI) headers() map[string]string { } // create calls POST /api/2.0/lakebox/sandboxes. An empty `name` is omitted -// from the wire payload so the server treats it as "unset" rather than -// "explicit empty string." +// so the server treats it as "unset" rather than "explicit empty string". func (a *lakeboxAPI) create(ctx context.Context, name string) (*createResponse, error) { body := createRequest{Sandbox: sandboxCreateBody{Name: name}} var resp createResponse @@ -244,7 +204,7 @@ func (a *lakeboxAPI) create(ctx context.Context, name string) (*createResponse, } // list calls GET /api/2.0/lakebox/sandboxes, following pagination until the -// server stops sending `next_page_token`. Returns the full set in one slice. +// server stops sending `next_page_token`. func (a *lakeboxAPI) list(ctx context.Context) ([]sandboxEntry, error) { var all []sandboxEntry pageToken := "" @@ -261,8 +221,7 @@ func (a *lakeboxAPI) list(ctx context.Context) ([]sandboxEntry, error) { } } -// listPage fetches a single page of sandboxes. An empty `pageToken` requests -// the first page; the server enforces ordering across pages. +// listPage fetches a single page of sandboxes. // // `query` is passed in slot 6 (`request`), not slot 5 (`queryParams`). On // GET, the SDK's makeRequestBody serializes `request` into the URL query @@ -294,9 +253,9 @@ func (a *lakeboxAPI) get(ctx context.Context, id string) (*sandboxEntry, error) } // update calls PATCH /api/2.0/lakebox/sandboxes/{id} with whichever of -// `idle_timeout` / `no_autostop` the caller chose to set. Fields left -// nil are omitted from the wire payload, so the server preserves their -// current values. Returns the refreshed `sandboxEntry`. +// `idle_timeout` / `no_autostop` the caller chose to set. Fields left nil +// are omitted from the wire payload, so the server preserves their current +// values. Returns the refreshed `sandboxEntry`. func (a *lakeboxAPI) update(ctx context.Context, id string, name *string, idleTimeoutSecs *int64, noAutostop *bool) (*sandboxEntry, error) { var idleTimeout *string if idleTimeoutSecs != nil { @@ -323,9 +282,7 @@ func (a *lakeboxAPI) delete(ctx context.Context, id string) error { } // stop calls POST /api/2.0/lakebox/sandboxes/{id}/stop and returns the -// refreshed sandbox. The proto's `StopSandboxRequest` carries `sandbox_id` -// (redundant with the URL path) under `body: "*"`, so we mirror it -// explicitly even though the transcoder fills the field from the path. +// refreshed sandbox. func (a *lakeboxAPI) stop(ctx context.Context, id string) (*sandboxEntry, error) { body := map[string]string{"sandbox_id": id} var resp sandboxEntry @@ -337,7 +294,7 @@ func (a *lakeboxAPI) stop(ctx context.Context, id string) (*sandboxEntry, error) } // start calls POST /api/2.0/lakebox/sandboxes/{id}/start and returns the -// refreshed sandbox. Mirror of `stop`; same body shape per `body: "*"`. +// refreshed sandbox. func (a *lakeboxAPI) start(ctx context.Context, id string) (*sandboxEntry, error) { body := map[string]string{"sandbox_id": id} var resp sandboxEntry @@ -349,15 +306,12 @@ func (a *lakeboxAPI) start(ctx context.Context, id string) (*sandboxEntry, error } // registerKey calls POST /api/2.0/lakebox/ssh-keys. An empty `name` is -// omitted from the wire payload so the server records "unset" rather than -// an explicit empty string. +// omitted so the server records "unset" rather than an explicit empty string. func (a *lakeboxAPI) registerKey(ctx context.Context, publicKey, name string) error { return a.c.Do(ctx, http.MethodPost, lakeboxKeysAPIPath, a.headers(), nil, registerKeyRequest{PublicKey: publicKey, Name: name}, nil) } -// sshKeyEntry is a single item in the ssh-key list response. Mirrors the -// `SshKey` proto message after JSON transcoding (`key_hash` → `keyHash`, -// timestamps as RFC 3339 strings). +// sshKeyEntry is a single item in the ssh-key list response. type sshKeyEntry struct { KeyHash string `json:"keyHash"` Name string `json:"name,omitempty"` @@ -366,8 +320,8 @@ type sshKeyEntry struct { } // listKeysResponse is the JSON body returned by GET /api/2.0/lakebox/ssh-keys. -// Per-user keys are hard-capped at 100 server-side, so the full set fits in -// one response — no pagination. +// Per-user keys are hard-capped server-side, so the full set fits in one +// response — no pagination. type listKeysResponse struct { SshKeys []sshKeyEntry `json:"sshKeys"` } diff --git a/cmd/lakebox/completion.go b/cmd/lakebox/completion.go index f05cc621884..29e9c074206 100644 --- a/cmd/lakebox/completion.go +++ b/cmd/lakebox/completion.go @@ -6,19 +6,9 @@ import ( "github.com/spf13/cobra" ) -// completeSandboxIDs is a Cobra ValidArgsFunction returning the caller's -// sandbox IDs (and display names, when distinct from the ID) for tab -// completion. Reads purely from the local cache populated by `lakebox -// list` / `create` / `status` / etc. — no API call on ``, so a flaky -// network or unrefreshed auth token never makes the shell hang. -// -// Cobra runs ValidArgsFunction in a separate process from the main -// command, so we still need to bootstrap the workspace client just to -// learn which profile we're under. The cache itself is profile-scoped. -// -// Best-effort: any failure (no profile resolvable, empty cache) returns -// no suggestions instead of an error so the shell stays usable and the -// user can still type the ID by hand. +// completeSandboxIDs returns sandbox IDs and (distinct) display names +// from the local cache for tab completion. Cache-only so an unrefreshed +// token never hangs the shell; any failure yields no suggestions. func completeSandboxIDs(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { if len(args) > 0 { return nil, cobra.ShellCompDirectiveNoFileComp @@ -31,9 +21,7 @@ func completeSandboxIDs(cmd *cobra.Command, args []string, toComplete string) ([ if len(sbs) == 0 { return nil, cobra.ShellCompDirectiveNoFileComp } - // Offer the ID always, plus the display name when the user actually - // set one (server defaults `name` to the sandbox ID, so don't echo - // the same string twice). + // Server defaults `name` to the ID, so only emit the name when it's distinct. suggestions := make([]string, 0, len(sbs)*2) for _, s := range sbs { suggestions = append(suggestions, s.ID) @@ -44,10 +32,8 @@ func completeSandboxIDs(cmd *cobra.Command, args []string, toComplete string) ([ return suggestions, cobra.ShellCompDirectiveNoFileComp } -// completeSSHKeyHashes is the equivalent for `ssh-key delete `, -// returning the hashes of registered keys. SSH-key hashes aren't cached -// locally (per-user cap is ~100 server-side and listing is cheap), so -// this path still calls the API. +// completeSSHKeyHashes returns registered key hashes for `ssh-key delete`. +// Hashes aren't cached locally, so this path calls the API. func completeSSHKeyHashes(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { if len(args) > 0 { return nil, cobra.ShellCompDirectiveNoFileComp diff --git a/cmd/lakebox/config.go b/cmd/lakebox/config.go index 6ca1d6350ce..efe0d51220b 100644 --- a/cmd/lakebox/config.go +++ b/cmd/lakebox/config.go @@ -11,9 +11,9 @@ import ( "github.com/spf13/cobra" ) -// MIN_IDLE_TIMEOUT_SECS / MAX_IDLE_TIMEOUT_SECS mirror the manager-side -// constants in lakebox/src/api/handlers/sandbox.rs. Pre-flighting client-side -// gives a clearer error than waiting for the server's INVALID_ARGUMENT. +// minIdleTimeoutSecs / maxIdleTimeoutSecs mirror the server-side bounds +// on `idle_timeout`. Pre-flighting client-side gives a clearer error +// than waiting for the server's INVALID_ARGUMENT. const ( minIdleTimeoutSecs = 60 maxIdleTimeoutSecs = 86_400 @@ -78,8 +78,6 @@ Examples: return err } - // Translate flag presence + value into the proto3 - // optional-field semantics the server expects. var idleSecs *int64 if cmd.Flags().Changed("idle-timeout") { secs, err := parseIdleTimeoutFlag(idleTimeoutFlag) @@ -157,10 +155,9 @@ func checkIdleSecs(secs int64) (int64, error) { return 0, nil // clear / revert to global default } if secs < minIdleTimeoutSecs || secs > maxIdleTimeoutSecs { - // Format both the bounds and the offending value as Go-style - // durations to match the input form the user typed and the - // flag's --help text (Anwell flagged the prior `86400s` / - // `90000s` echoes as confusing — same unit as input now). + // Echo bounds in Go duration form so they match the input the + // user typed (raw seconds like `86400s` reads as a different + // unit). return 0, fmt.Errorf( "idle-timeout must be 0 (clear) or between %s and %s, got %s", formatDurationSecs(minIdleTimeoutSecs), diff --git a/cmd/lakebox/delete.go b/cmd/lakebox/delete.go index 809dbda5fb2..f43c8eb7357 100644 --- a/cmd/lakebox/delete.go +++ b/cmd/lakebox/delete.go @@ -47,10 +47,8 @@ Examples: return err } - // Validate existence first so `delete ` fails clearly - // instead of returning a confident "✓ Removed" on a sandbox - // the server never had — the DELETE endpoint treats 404 as - // idempotent success on the wire. + // DELETE returns success on 404, so pre-check existence to + // surface typos clearly. entry, err := api.get(ctx, lakeboxID) if err != nil { if errors.Is(err, apierr.ErrNotFound) { diff --git a/cmd/lakebox/keyhash.go b/cmd/lakebox/keyhash.go index 7f4fcd0bd45..823eb1e1682 100644 --- a/cmd/lakebox/keyhash.go +++ b/cmd/lakebox/keyhash.go @@ -10,13 +10,7 @@ import ( // the first 16 bytes and hex-encoded; the OpenSSH comment (anything after // the second whitespace-separated token) is stripped before hashing, so // registering the same key under different comments yields the same hash. -// Inputs that don't have a second token are hashed as-is. -// -// Useful for matching a locally-known key against entries in a -// GET /ssh-keys listing without sending the key contents back to the -// server. func keyHash(publicKey string) string { - // Slice off the OpenSSH comment by stopping at the second space. end := len(publicKey) spaces := 0 for i, c := range publicKey { diff --git a/cmd/lakebox/lakebox.go b/cmd/lakebox/lakebox.go index e406fccbbd1..382d6865d45 100644 --- a/cmd/lakebox/lakebox.go +++ b/cmd/lakebox/lakebox.go @@ -4,6 +4,7 @@ import ( "github.com/spf13/cobra" ) +// New returns the root command for the lakebox subcommand. func New() *cobra.Command { cmd := &cobra.Command{ Use: "lakebox", diff --git a/cmd/lakebox/list.go b/cmd/lakebox/list.go index 83be0fba989..6e415241e95 100644 --- a/cmd/lakebox/list.go +++ b/cmd/lakebox/list.go @@ -96,23 +96,11 @@ Example: out := cmd.OutOrStdout() - // Compute column widths. AUTOSTOP holds short tokens like - // `never`, `15m`, `1h30m` — 8 chars covers them. - // - // NAME is *always* rendered, even when no sandbox has a - // custom --name set: yunquan flagged on the bug-bash form - // that the prior auto-hide made the table shape change - // between calls (NAME appears the moment you set --name on - // any one box and vanishes when you clear them all), which - // breaks scripts and muscle memory. Sandboxes without a - // custom name render as `-` in the NAME cell. - // - // All column widths are measured in *terminal cells*, not - // bytes or runes — emoji and CJK glyphs render as 2 cells - // despite being 1 rune / multi-byte, and using len() here - // (which counts bytes) misaligns the row whenever a `--name` - // includes wide characters. cmdio.Width gives the - // East-Asian-Width-corrected cell count. + // NAME is always rendered (even with no --name set on any + // row) so the table shape stays stable across calls; + // unnamed sandboxes render as `-`. Widths are measured in + // terminal cells via cmdio.Width so emoji / CJK names line + // up correctly. idCol := 10 autostopCol := 8 nameCol := 4 @@ -123,10 +111,8 @@ Example: if l := cmdio.Width(e.autoStopLabel()); l > autostopCol { autostopCol = l } - // Only let an actual custom name expand the column. A - // sandbox whose `name` happens to equal its `id` would - // otherwise drive the column to the ID's width — for no - // gain, since that row renders as `-`. + // A name equal to the id renders as `-`, so don't let + // it expand the column. if e.Name != "" && e.Name != e.SandboxID { if l := cmdio.Width(e.Name); l > nameCol { nameCol = l diff --git a/cmd/lakebox/register.go b/cmd/lakebox/register.go index 7bc083f149a..ca19a91865b 100644 --- a/cmd/lakebox/register.go +++ b/cmd/lakebox/register.go @@ -66,10 +66,7 @@ Examples: return fmt.Errorf("failed to read public key %s.pub: %w", keyPath, err) } - // Default the registered key's label to this machine's hostname so - // `lakebox ssh-key list` is meaningful when the user has keys from - // multiple machines. Failed hostname lookups fall through to the - // server's "unset" default rather than blocking registration. + // Default to hostname so `ssh-key list` is meaningful across machines. if name == "" { if host, err := os.Hostname(); err == nil { name = host @@ -84,19 +81,9 @@ Examples: } s.ok("SSH key registered") - // Write the shared `lakebox-gw` SSH-config alias so editor - // Remote-SSH ("Open in VS Code/Cursor") deep links and - // `ssh @lakebox-gw` from a plain shell both work - // without the user having to paste any config block. The - // alias name and shape are aligned with the workspace UI's - // "First time setup?" disclosure, so CLI users and - // pasted-snippet users converge on the same config. - // - // First register on this machine prompts for consent (the - // Include line we add to ~/.ssh/config is a permanent - // change to a user-managed file). Re-runs are silent — if - // the Include is already there, the user has opted in and - // we just refresh the managed file's contents. + // Write the shared `lakebox-gw` ~/.ssh/config alias so editor + // Remote-SSH and plain `ssh @lakebox-gw` both work without + // the user pasting any config block (see maybeWriteSSHConfig). if err := maybeWriteSSHConfig(ctx, keyPath, w.Config.Host); err != nil { warn(ctx, fmt.Sprintf("registered key, but failed to update ~/.ssh/config: %v", err)) } @@ -124,7 +111,7 @@ func lakeboxKeyPath(ctx context.Context) (string, error) { } // ensureLakeboxKey returns the path to the lakebox SSH key, generating it if -// it doesn't exist. Returns (path, wasGenerated, error). +// it doesn't exist. func ensureLakeboxKey(ctx context.Context) (string, bool, error) { keyPath, err := lakeboxKeyPath(ctx) if err != nil { @@ -135,7 +122,6 @@ func ensureLakeboxKey(ctx context.Context) (string, bool, error) { return keyPath, false, nil } - // Check that ssh-keygen is available before trying to generate. if _, err := exec.LookPath("ssh-keygen"); err != nil { return "", false, errors.New( "ssh-keygen not found in PATH.\n" + diff --git a/cmd/lakebox/resolve.go b/cmd/lakebox/resolve.go index 42da7503adb..d2e6afbd7aa 100644 --- a/cmd/lakebox/resolve.go +++ b/cmd/lakebox/resolve.go @@ -32,8 +32,6 @@ func resolveLocalID(ctx context.Context, profile, arg string) (string, error) { sbs := getSandboxes(ctx, profile) - // ID-first so that a sandbox whose --name happens to collide with - // another sandbox's ID never gets mistakenly resolved by name. for _, s := range sbs { if s.ID == arg { return arg, nil diff --git a/cmd/lakebox/ssh.go b/cmd/lakebox/ssh.go index b0a7ad6c654..73715bd2040 100644 --- a/cmd/lakebox/ssh.go +++ b/cmd/lakebox/ssh.go @@ -25,8 +25,7 @@ const ( // resolveGatewayHost picks the SSH gateway hostname based on the workspace host. // Staging workspaces (*.staging.cloud.databricks.com etc.) route through -// ue1.s.dbrx.dev; everything else uses uw2.dbrx.dev. Both are dev-tier -// listeners (`.dbrx.dev`); there is no prod listener yet. +// ue1.s.dbrx.dev; everything else uses uw2.dbrx.dev. func resolveGatewayHost(workspaceHost string) string { if strings.Contains(workspaceHost, ".staging.") { return stagingDefaultGatewayHost @@ -65,7 +64,6 @@ Examples: profile = w.Config.Host } - // Use the dedicated lakebox SSH key. keyPath, err := lakeboxKeyPath(ctx) if err != nil { return fmt.Errorf("failed to determine lakebox key path: %w", err) @@ -74,8 +72,6 @@ Examples: return fmt.Errorf("lakebox SSH key not found at %s — run 'databricks lakebox register' first", keyPath) } - // Parse args: everything before -- is the optional lakebox ID, - // everything after -- is passed through to ssh. var lakeboxID string var extraArgs []string @@ -91,8 +87,6 @@ Examples: extraArgs = args[dashAt:] } - // Resolve a user-typed name to its ID using the local cache. - // No-op when arg is already an ID or not in cache. if lakeboxID != "" { resolved, err := resolveLocalID(ctx, profile, lakeboxID) if err != nil { @@ -101,36 +95,19 @@ Examples: lakeboxID = resolved } - // sandboxGatewayHost captures the gateway hostname from any - // Sandbox response we touch in this command, so the resolution - // below can prefer it over the cached value. Stays "" when we - // never hit the API in this invocation (e.g. explicit-id ssh - // with a warm cache). - var sandboxGatewayHost string - - // sandboxStatus is the server-side state observed in this - // invocation, used to print an explicit "starting from stopped" - // notice before the connect spinner. Empty when we never hit - // the API. - var sandboxStatus string + // Captured from any Sandbox response we touch below; "" when + // we never hit the API in this invocation. + var ( + sandboxGatewayHost string + sandboxStatus string + ) api, err := newLakeboxAPI(w) if err != nil { return err } - // Verify the local key is still registered before opening - // the SSH socket. The gateway's publickey-callback already - // checks the key (see lakebox/src/ssh/handler.rs's - // verify_ssh_key), but SSH_MSG_USERAUTH_FAILURE has no - // free-form reason field and SSH_MSG_USERAUTH_BANNER is - // swallowed by many client wrappers — so the gateway can't - // communicate "key not registered" through the SSH channel. - // The HTTP API is the only out-of-band wire that can - // surface a structured "run register" pointer to the user. - // listKeys errors fall through with a warning so a - // transient API hiccup doesn't block a connection the - // gateway could still route. + // Surface deleted-key errors before ssh's opaque "Permission denied". if err := verifyKeyRegistered(ctx, api, keyPath); err != nil { return err } @@ -178,13 +155,8 @@ Examples: } } - // Explicitly start (and wait for) the sandbox if it isn't - // already Running. The gateway will auto-start a stopped - // sandbox on connect, but that path is opaque (ssh just - // hangs for minutes with no progress) and races the - // cold-start timeout. Driving the start ourselves gives - // the user a visible spinner with elapsed time and a - // deterministic timeout (see start.go). + // Drive start ourselves so the user sees a spinner instead + // of an opaque multi-minute hang inside ssh's connect path. if sandboxStatus != "" && !strings.EqualFold(sandboxStatus, "running") { final, err := ensureRunning(ctx, api, lakeboxID, sandboxStatus) if err != nil { @@ -214,12 +186,9 @@ Examples: _ = setGatewayHost(ctx, profile, sandboxGatewayHost) } - // Spinner runs until exec replaces this process (Linux/macOS) - // or until the ssh subprocess returns (Windows execv shim). - // We deliberately don't print "Connected" — at this point ssh - // hasn't actually handshaken yet, so any success affirmation - // here would be a lie that gets contradicted by ssh's own - // error output on the failure path. + // Don't print "Connected" here — ssh hasn't completed the + // handshake yet, so any success message would race ssh's + // own error output on the failure path. s := spin(ctx, "Connecting to "+cmdio.Bold(ctx, lakeboxID)+"…") defer s.Close() return execSSHDirect(lakeboxID, host, gatewayPort, keyPath, extraArgs) @@ -238,11 +207,11 @@ Examples: // SSH protocol's reply surface (USERAUTH_FAILURE has no free-form // reason; USERAUTH_BANNER is widely swallowed) flattens "unknown // key", "key registered but not authorized for this sandbox", and -// "ESM is down" into the same "Permission denied (publickey)". This -// out-of-band HTTP check surfaces the specific case in language the -// user can act on. listKeys errors fall through with a warning so a -// transient API hiccup doesn't block a connection the gateway could -// still route. +// "upstream service is down" into the same "Permission denied +// (publickey)". This out-of-band HTTP check surfaces the specific +// case in language the user can act on. listKeys errors fall +// through with a warning so a transient API hiccup doesn't block a +// connection the gateway could still route. func verifyKeyRegistered(ctx context.Context, api *lakeboxAPI, keyPath string) error { pub, err := os.ReadFile(keyPath + ".pub") if err != nil { @@ -263,10 +232,8 @@ func verifyKeyRegistered(ctx context.Context, api *lakeboxAPI, keyPath string) e return fmt.Errorf("your lakebox SSH key (%s) is not registered with this workspace — run `databricks lakebox register` to re-register it", want) } -// ensureRunning brings the named sandbox to Running before ssh hands -// off. Owns its own spinner lifecycle — caller must not have one open. -// Calls api.start when the sandbox is currently Stopped; falls through -// to a poll for already-transitioning states (Creating, Starting). +// ensureRunning brings the named sandbox to Running with its own +// spinner — caller must not have one open. func ensureRunning(ctx context.Context, api *lakeboxAPI, id, currentStatus string) (*sandboxEntry, error) { s := spin(ctx, "Starting "+cmdio.Bold(ctx, id)+"…") defer s.Close() @@ -320,7 +287,7 @@ func execSSHDirect(lakeboxID, host, port, keyPath string, extraArgs []string) er // `lakebox ssh -- bash -c 'echo hi'` // Cobra splits this into `["bash", "-c", "echo hi"]`. ssh's join // produces `bash -c echo hi` on the wire, which bash re-splits into -// `-c=echo` and `$0=hi` — bug F22. We fix that by shell-quoting +// `-c=echo` and `$0=hi` — losing the second word. We fix that by shell-quoting // each arg before append, so the remote sees `bash -c 'echo hi'`. // // The heuristic: if there's exactly one extra arg, pass it untouched; diff --git a/cmd/lakebox/ssh_test.go b/cmd/lakebox/ssh_test.go index a88ad47c67d..a2558794a25 100644 --- a/cmd/lakebox/ssh_test.go +++ b/cmd/lakebox/ssh_test.go @@ -50,7 +50,7 @@ func TestBuildSSHArgsQuoting(t *testing.T) { expected: []string{"ls", "-la", "/tmp"}, }, { - name: "bash -c '' — third arg gets quoted (this is F22)", + name: "bash -c '' — third arg gets quoted", // The whole point: without the quoting, the remote shell // re-splits "bash -c echo hi" and bash's -c eats just "echo". extraArgs: []string{"bash", "-c", "echo hi"}, diff --git a/cmd/lakebox/sshconfig.go b/cmd/lakebox/sshconfig.go index 9e57dc49b09..24b04d158fc 100644 --- a/cmd/lakebox/sshconfig.go +++ b/cmd/lakebox/sshconfig.go @@ -33,8 +33,7 @@ const ( ) // sshConfigPaths returns (managedFile, mainConfig) under the user's -// ~/.ssh directory. Side-effect free; safe to call before deciding -// whether to actually write anything. +// ~/.ssh directory. func sshConfigPaths(ctx context.Context) (string, string, error) { home, err := env.UserHomeDir(ctx) if err != nil { @@ -45,9 +44,7 @@ func sshConfigPaths(ctx context.Context) (string, string, error) { } // sshConfigAlreadyManaged reports whether ~/.ssh/config already -// contains the lakebox-managed Include block. Used by `register` to -// decide whether to prompt the user for consent (first time) or -// silently refresh the managed file (already opted in). +// contains the lakebox-managed Include block. func sshConfigAlreadyManaged(ctx context.Context) (bool, error) { _, mainPath, err := sshConfigPaths(ctx) if err != nil { @@ -66,8 +63,6 @@ func sshConfigAlreadyManaged(ctx context.Context) (bool, error) { // writeSSHConfig writes the lakebox-managed SSH config block to a // managed file and, if not already present, adds an Include directive // to the user's ~/.ssh/config pointing at that file. -// -// Returns (managedFilePath, mainConfigPath, error). func writeSSHConfig(ctx context.Context, keyPath, gatewayHost, gatewayPort string) (string, string, error) { home, err := env.UserHomeDir(ctx) if err != nil { @@ -114,11 +109,8 @@ Host %s `, sshConfigAlias, gatewayHost, gatewayPort, keyPath) } -// writeManagedConfig writes content to path with 0600 perms, atomically -// via tmp + rename so a crash mid-write can't leave a half-written -// file. Skips the write entirely when the existing content already -// matches, so repeated `lakebox register` runs don't churn the file's -// mtime. +// writeManagedConfig writes content to path atomically with 0600 +// perms. No-op when the file already matches, to avoid churning mtime. func writeManagedConfig(path, content string) error { if existing, err := os.ReadFile(path); err == nil && bytes.Equal(existing, []byte(content)) { return nil @@ -154,16 +146,12 @@ func ensureMainIncludesManaged(mainPath, managedPath string) error { return fmt.Errorf("reading %s: %w", mainPath, err) } - // Prepend our block. SSH processes the file top-down and uses the - // first value seen for each option; placing our Include first lets - // `lakebox-gw` always resolve to the managed values even if the - // user has a wildcard `Host *` block later. + // Prepend so our Include wins SSH's first-match-per-option + // semantics over any wildcard `Host *` block later in the file. var buf bytes.Buffer buf.WriteString(managedBlock) if len(existing) > 0 { - // Ensure visual separation between our block and the user's - // content. If the existing file already starts with a blank - // line, don't add another. + // Avoid double blank lines if the file already starts with one. if !strings.HasPrefix(string(existing), "\n") { buf.WriteString("\n") } @@ -194,15 +182,8 @@ func hasOurMarkedBlock(text string) bool { } // maybeWriteSSHConfig writes the lakebox-managed SSH config, prompting -// for consent the first time on this machine. Re-runs are silent: the -// Include line in ~/.ssh/config signals prior opt-in, and we just -// refresh the managed file's contents (e.g. if the gateway host has -// changed). -// -// In non-interactive contexts (no TTY, no `--yes`-style flag here), -// we skip the write rather than fail — `lakebox ssh` still works via -// argv-explicit flags, only IDE Remote-SSH from the workspace UI -// needs the config alias. +// for consent the first time on this machine. Re-runs silently refresh +// the managed file. Non-interactive contexts skip the write entirely. func maybeWriteSSHConfig(ctx context.Context, keyPath, workspaceHost string) error { already, err := sshConfigAlreadyManaged(ctx) if err != nil { diff --git a/cmd/lakebox/sshkey.go b/cmd/lakebox/sshkey.go index 4539441ce2a..3ad634f282e 100644 --- a/cmd/lakebox/sshkey.go +++ b/cmd/lakebox/sshkey.go @@ -127,8 +127,7 @@ Examples: const hashCol = 32 const timeCol = 20 - // Leading 4-char gutter reserves space for a per-row `*` marker on - // the key matching this machine; header and separator leave it blank. + // 4-char gutter holds a per-row `*` for the local key (blank in header). header := fmt.Sprintf("%-*s %-*s %-*s %s", nameCol, "NAME", hashCol, "KEY HASH", timeCol, "CREATED", "LAST USED") fmt.Fprintf(out, " %s\n", cmdio.Faint(ctx, header)) diff --git a/cmd/lakebox/start.go b/cmd/lakebox/start.go index dd6b7a5dd40..6abb6cd4861 100644 --- a/cmd/lakebox/start.go +++ b/cmd/lakebox/start.go @@ -12,14 +12,10 @@ import ( "github.com/spf13/cobra" ) -// Bounds for `start`'s "wait until Running" loop. The server's StartSandbox -// RPC returns immediately with status="Creating" (reused for cold start — -// see F10), so the CLI polls until it actually reaches Running. Matches -// `create`'s blocking semantics so scripts can chain start → ssh / start → -// config without racing the cold boot. The 10-minute timeout covers -// Mitch's observed cold-start range (5–13 minutes) for the common case; -// truly stuck sandboxes still surface as a timeout rather than hanging -// the script forever. +// Bounds for `start`'s "wait until Running" poll. StartSandbox returns +// immediately with status="Creating", so we poll until it actually +// reaches Running. 10 min covers the observed cold-start range +// (5–13 min); stuck sandboxes surface as a timeout, not a hang. const ( startPollInterval = 2 * time.Second startWaitTimeout = 10 * time.Minute diff --git a/cmd/lakebox/state.go b/cmd/lakebox/state.go index e30160563bc..e0a2d2505f5 100644 --- a/cmd/lakebox/state.go +++ b/cmd/lakebox/state.go @@ -39,6 +39,7 @@ type cachedSandbox struct { Name string `json:"name,omitempty"` } +// stateFilePath returns the on-disk location of the lakebox state file. func stateFilePath(ctx context.Context) (string, error) { home, err := env.UserHomeDir(ctx) if err != nil { @@ -47,6 +48,8 @@ func stateFilePath(ctx context.Context) (string, error) { return filepath.Join(home, ".databricks", "lakebox.json"), nil } +// loadState reads the lakebox state file, returning an empty state when +// the file doesn't exist yet. func loadState(ctx context.Context) (*stateFile, error) { path, err := stateFilePath(ctx) if err != nil { @@ -71,6 +74,7 @@ func loadState(ctx context.Context) (*stateFile, error) { return &state, nil } +// saveState writes the lakebox state file atomically. func saveState(ctx context.Context, state *stateFile) error { path, err := stateFilePath(ctx) if err != nil { @@ -224,7 +228,7 @@ func upsertSandbox(ctx context.Context, profile, id, name string) error { // the profile's `GatewayHosts` entry — there is nothing for the // gateway hostname to apply to until the user creates a new sandbox, // and leaving the entry behind accumulates orphan state across the -// lifecycle of a profile (per Mitch's "Delete cleanup" CUJ). +// lifecycle of a profile. func removeSandbox(ctx context.Context, profile, id string) error { state, err := loadState(ctx) if err != nil { diff --git a/cmd/lakebox/status.go b/cmd/lakebox/status.go index 97744e03032..0a88b0b91cf 100644 --- a/cmd/lakebox/status.go +++ b/cmd/lakebox/status.go @@ -19,8 +19,8 @@ func newStatusCommand() *cobra.Command { Long: `Show detailed status of a Lakebox environment. Example: - lakebox status happy-panda-1234 - lakebox status happy-panda-1234 --json`, + databricks lakebox status happy-panda-1234 + databricks lakebox status happy-panda-1234 --json`, Args: cobra.ExactArgs(1), PreRunE: root.MustWorkspaceClient, ValidArgsFunction: completeSandboxIDs, diff --git a/cmd/lakebox/ui.go b/cmd/lakebox/ui.go index 45aa730f79e..203809ec982 100644 --- a/cmd/lakebox/ui.go +++ b/cmd/lakebox/ui.go @@ -11,10 +11,7 @@ import ( ) // jsonOutput reports whether the user asked for JSON output, via either -// the lakebox-local `--json` flag or the framework-global `-o json`. The -// global flag is the standard shape across other `databricks` commands -// and the validator already rejects bogus values, but until this helper -// existed lakebox commands silently ignored it and emitted text anyway. +// the local `--json` flag or the framework-global `-o json`. func jsonOutput(cmd *cobra.Command, jsonFlag bool) bool { if jsonFlag { return true @@ -25,18 +22,15 @@ func jsonOutput(cmd *cobra.Command, jsonFlag bool) bool { return false } -// cmdioSpinner is the subset of *cmdio.spinner's method set we need. -// Defining the interface locally lets us hold the unexported type as a -// struct field; cmdio's spinner satisfies it structurally. +// cmdioSpinner is the subset of *cmdio.spinner's method set we need, +// defined locally so we can hold the unexported type as a field. type cmdioSpinner interface { Update(msg string) Close() } -// spinner wraps cmdio.NewSpinner with ok/fail markers. ok and fail close the -// underlying spinner and log a final ✓/✗ line; Close stops the spinner -// without printing. cmdio's Close is itself idempotent, so a `defer s.Close()` -// is safe alongside an ok/fail call on the success path. +// spinner wraps cmdio.NewSpinner with ok/fail markers. Close (idempotent +// in cmdio) is safe alongside ok/fail, so callers can `defer s.Close()`. type spinner struct { cmdioSpinner ctx context.Context diff --git a/internal/bugbash/install.sh b/internal/bugbash/install.sh deleted file mode 100755 index 4543393ac82..00000000000 --- a/internal/bugbash/install.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash -# Install the latest demo-lakebox snapshot of the databricks CLI to -# ~/.databricks-snapshot/bin/databricks. Re-run any time you want to -# pick up new commits on the branch — the latest successful CI -# release-build is always what you get. -# -# The install dir is NOT added to PATH by default to avoid shadowing -# any system `databricks` binary. The script prints activation options -# at the end; pick whichever fits your workflow. - -set -euo pipefail - -REPO="databricks/cli" -BRANCH="demo-lakebox" -INSTALL_DIR="$HOME/.databricks-snapshot/bin" - -OS="$(uname -s | tr 'A-Z' 'a-z')" -ARCH="$(uname -m)"; [[ $ARCH == x86_64 ]] && ARCH=amd64 -[[ $ARCH == aarch64 ]] && ARCH=arm64 -case "$OS" in linux|darwin) ;; *) echo "unsupported OS: $OS" >&2; exit 1 ;; esac -case "$ARCH" in amd64|arm64) ;; *) echo "unsupported arch: $ARCH" >&2; exit 1 ;; esac - -for tool in gh jq curl; do - command -v "$tool" >/dev/null 2>&1 || { echo "$tool not installed (try: brew install $tool)" >&2; exit 1; } -done - -mkdir -p "$INSTALL_DIR" - -echo "→ looking up latest successful release-build run on $REPO @ $BRANCH …" -rid=$(gh run list -b "$BRANCH" -w release-build -R "$REPO" \ - --json databaseId,conclusion --limit 5 \ - | jq -r 'limit(1; .[] | select(.conclusion=="success")) | .databaseId') -[[ -n $rid && $rid != null ]] || { echo "no successful release-build run found on $BRANCH" >&2; exit 1; } - -tmp="$(mktemp -d)" -trap 'rm -rf "$tmp"' EXIT -echo "→ downloading artifact from run $rid …" -( cd "$tmp" && gh run download "$rid" -R "$REPO" -n cli >/dev/null ) - -tar="$tmp/databricks_cli_${OS}_${ARCH}.tar.gz" -[[ -f $tar ]] || { echo "expected platform tarball not in artifact: $tar" >&2; exit 1; } -( cd "$tmp" && tar -xzf "$tar" ) -install -m 755 "$tmp/databricks" "$INSTALL_DIR/databricks" - -version=$("$INSTALL_DIR/databricks" --version) -echo -echo "✓ installed $version to $INSTALL_DIR/databricks (CI run $rid)" -echo -echo "The install dir is intentionally NOT on PATH. Activate it one of three ways:" -echo -echo " 1. Just this shell, alongside your normal databricks:" -echo " alias databricks=\"$INSTALL_DIR/databricks\"" -echo -echo " 2. All future shells (replaces system databricks for new terminals):" -echo " echo 'export PATH=\"\$HOME/.databricks-snapshot/bin:\$PATH\"' >> ~/.zshrc" -echo " source ~/.zshrc" -echo -echo " 3. Without shadowing — invoke by full path or a distinct name:" -echo " $INSTALL_DIR/databricks lakebox list" -echo " ln -s $INSTALL_DIR/databricks ~/.local/bin/databricks-snapshot # then run \`databricks-snapshot lakebox …\`"